一、簡(jiǎn)介
初次開始使用Selenium,于是開始用登錄作為聯(lián)手項(xiàng)目,是一個(gè)真實(shí)的登錄界面,包含驗(yàn)證碼(驗(yàn)證碼是難點(diǎn),獲取與識(shí)別),以下會(huì)講的很詳細(xì),我怕我下次又忘記了
我采用Selenium + python + 百度云OCR,安裝等教程自行百度,我就不多說(shuō)了
二、selenium打開瀏覽器獲取相應(yīng)el
from selenium import webdriver
from selenium.webdriver.common.by import By
import base64
#注冊(cè)Chrome驅(qū)動(dòng),打開瀏覽器網(wǎng)址
driver = webdriver.Chrome()
url = "************"
driver.get(url)
#獲取相應(yīng)el
driver.find_element(By.NAME,"***").send_keys('***')
driver.find_element(By.NAME, "***").send_keys('***')
#獲取圖片
png = driver.find_element(By.CSS_SELECTOR,"****")
img_url = png.get_attribute("src") # 獲得圖片的url,本次測(cè)試的是bs64生成,采用解碼獲得圖片
head, context = img_url.split(",") # 截取后字段的bas64編碼
img_data = base64.b64decode(context)
image = Image.open(BytesIO(img_data));
image.save("capt.png")
這段代碼主要是利用selenium打開網(wǎng)站,獲取到相應(yīng)的dom,find_element_by_id等寫法已經(jīng)統(tǒng)一換為find_element(By.ID, “***”)等了,先將圖片保存下來(lái)考研方便后續(xù)的識(shí)別
三、驗(yàn)證碼預(yù)處理
驗(yàn)證碼一般無(wú)法直接識(shí)別,我們需要通過(guò)灰度、二值、降噪等手段,使得圖片更加清晰,提高圖片的識(shí)別準(zhǔn)確率
下列代碼需要提前引入以下包文章來(lái)源:http://www.zghlxwxcb.cn/news/detail-788427.html
from PIL import Image
1.灰度化
img = img.convert('L') # P模式轉(zhuǎn)換為L(zhǎng)模式(灰度模式默認(rèn)閾值127)
2.二值化
count = 230 # 設(shè)定閾值,閾值的大小由圖片的清晰度等自行調(diào)節(jié)
table = []
for i in range(256):
if i < count:
table.append(0)
else:
table.append(1)
img = img.point(table, '1')
img.save('captcha.png')
3.降噪
pixdata = img.load()
w,h = img.size
for y in range(1,h-1):
for x in range(1,w-1):
count = 0
if pixdata[x,y-1] > 245:
count = count + 1
if pixdata[x,y+1] > 245:
count = count + 1
if pixdata[x-1,y] > 245:
count = count + 1
if pixdata[x+1,y] > 245:
count = count + 1
if count > 2:
pixdata[x,y] = 255
return img
四、百度云識(shí)別
需要提前注冊(cè)文章來(lái)源地址http://www.zghlxwxcb.cn/news/detail-788427.html
from aip import AipOcr
# 識(shí)別碼
APP_ID = '****'
API_KEY = '****'
SECRET_KEY = '****'
# 初始化對(duì)象
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
# 讀取圖片
def get_file_content(file_path):
with open(file_path, 'rb') as f:
return f.read()
image = get_file_content('end.png') # 傳入圖片
# 定義參數(shù)變量
options = {'language_type': 'ENG', } # 識(shí)別語(yǔ)言類型,默認(rèn)為'CHN_ENG'中英文混合
# 調(diào)用通用文字識(shí)別
result = client.basicGeneral(image, options) # 高精度接口 basicAccurate
for word in result['words_result']:
captcha = (word['words'])
print('識(shí)別結(jié)果:' + captcha)
return captcha
五、完整代碼
import base64
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from PIL import Image
from time import sleep
from aip import AipOcr
# 對(duì)二值化的圖像進(jìn)行降噪處理,使得識(shí)別更加準(zhǔn)確
def depoint(img):
pixdata = img.load()
w,h = img.size
for y in range(1,h-1):
for x in range(1,w-1):
count = 0
if pixdata[x,y-1] > 245:
count = count + 1
if pixdata[x,y+1] > 245:
count = count + 1
if pixdata[x-1,y] > 245:
count = count + 1
if pixdata[x+1,y] > 245:
count = count + 1
if count > 2:
pixdata[x,y] = 255
return img
# 獲得驗(yàn)證碼,并進(jìn)行處理,二值化
def deal_image():
png = driver.find_element(By.CSS_SELECTOR,".*******")
img_url = png.get_attribute("src") # 獲得圖片的url,本次測(cè)試的是bs64生成,采用解碼獲得圖片
head, context = img_url.split(",") # 截取后字段的bas64編碼
img_data = base64.b64decode(context)
image = Image.open(BytesIO(img_data));
image.save("capt.png")
img = Image.open('capt.png')
img = img.convert('L') # P模式轉(zhuǎn)換為L(zhǎng)模式(灰度模式默認(rèn)閾值127)
count = 230 # 設(shè)定閾值,閾值的大小由圖片的清晰度等自行調(diào)節(jié)
table = []
for i in range(256):
if i < count:
table.append(0)
else:
table.append(1)
# print(table)
img = img.point(table, '1')
img.save('captcha.png')
# 調(diào)用百度云OCR API 進(jìn)行識(shí)別
def discern_captcha():
# 識(shí)別碼
APP_ID = '****'
API_KEY = '****'
SECRET_KEY = '****'
# 初始化對(duì)象
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
# 讀取圖片
def get_file_content(file_path):
with open(file_path, 'rb') as f:
return f.read()
image = get_file_content('end.png') # 傳入圖片
# 定義參數(shù)變量
options = {'language_type': 'ENG', } # 識(shí)別語(yǔ)言類型,默認(rèn)為'CHN_ENG'中英文混合
# 調(diào)用通用文字識(shí)別
result = client.basicGeneral(image, options) # 高精度接口 basicAccurate
for word in result['words_result']:
captcha = (word['words'])
print('識(shí)別結(jié)果:' + captcha)
return captcha
# 登錄事件,若識(shí)別不準(zhǔn)確需點(diǎn)擊第二次重復(fù)
def ClickRandomCode():
deal_image()
image = depoint(Image.open('captcha.png'))
image.save('end.png')
image = depoint(Image.open('end.png'))
image.save("end.png")
randomCode = discern_captcha()
driver.find_element(By.NAME, "randomCode").send_keys(randomCode)
driver.find_element(By.CSS_SELECTOR, ".card-z .el-button").click()
sleep(2) # 錯(cuò)誤或成功信息有延遲
try:
text = driver.find_element(By.CSS_SELECTOR, ".el-message__content").text
if (text == "驗(yàn)證碼錯(cuò)誤"):
driver.find_element(By.CSS_SELECTOR, ".verification-code > img").click()
driver.find_element(By.CSS_SELECTOR, ".verification-code > img").clear()
ClickRandomCode() # 遞歸調(diào)用直至正確
print("驗(yàn)證碼錯(cuò)誤")
else:
print("登錄成功")
except:
print("ok")
def login(***,***):
driver.find_element(By.NAME,"****").send_keys(***)
driver.find_element(By.NAME, "***").send_keys(***)
ClickRandomCode()
if __name__ == '__main__':
driver = webdriver.Chrome()
url = "*****"
driver.get(url)
login("****","****")
到了這里,關(guān)于Selenium 初步使用之自動(dòng)登錄(包含識(shí)別驗(yàn)證碼),超級(jí)詳細(xì)的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!