-
-
[原创]使用自动化过腾讯滑块和阿里滑块
-
发表于: 2024-12-1 12:44 2127
-
案例一:tx滑块(playwright)
1 | 目标网站:aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu |
1、创建Chromium实例(如果不设置为 False,默认是无头模式启动浏览器)
1 2 | async with async_playwright() as p: browser = await p.chromium.launch(headless = False , args = [ '--start-maximized' ]) |
2、最大化窗口
1 | context = await browser.new_context(viewport = { "width" : 1920 , "height" : 1080 }, no_viewport = True ) |
3、新建标签页
1 | page = await context.new_page() |
4、加载过检测js
1 | await page.add_init_script(js) #stealth.min.js |
5、监听response事件
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | async def on_response(response): if '/cap_union_new_getcapbysig' in response.url and response.status = = 200 : #对背景图以及滑块图进行拦截然后保存 if 'img_index=1' in response.url: with open ( "bg_picture.jpg" , "wb" ) as f: f.write(requests.get(response.url).content) elif 'img_index=0' in response.url: with open ( "cut_picture.png" , "wb" ) as f: f.write(requests.get(response.url).content) print ( "response.url:" , response.url) if 'cap_union_new_verify' in response.url and response.status = = 200 : #滑块通过后获取参数 result = await response.text() print ( "response.url:" , response.url,result) page.on( 'response' ,on_response) |
6、打开网页、触发滑块
1 2 3 4 5 | await page.goto( 'aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu' ) await page.wait_for_timeout( 1500 ) await page.click( 'xpath=//*[@id="root"]/div/div[3]/div/div/div[5]/div/div' ) await page.wait_for_timeout( 500 ) await page.click( 'xpath=//*[@id="root"]/div/div[3]/div/div/div[8]/div[2]/div' ) |
7、识别坐标
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | def get_gap_offset(): """ 识别坐标,滑块的图片需要切割 """ det = ddddocr.DdddOcr(det = False , ocr = False , show_ad = False ) img = Image. open ( 'cut_picture.png' ) region = img.crop(( 160 , 508 , 243 , 595 )) # region.save(f 'cut_picture.png' ) with open ( 'bg_picture.jpg' , 'rb' ) as f: target_bytes = f.read() with open ( 'cut_picture.png' , 'rb' ) as f: background_bytes = f.read() res = det.slide_match(target_bytes, background_bytes, simple_target = True ) print ( "识别到的坐标位置:" , res) distance = int (res[ 'target' ][ 0 ]) return distance |
8、找到滑动起始点,并滑动
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | async def move_down(page): #定位iframe new_frame = page.frame_locator( 'iframe[id="tcaptcha_iframe_dy"]' ) #定位起始点 move_tag = new_frame.locator( 'xpath=//*[@id="tcOperation"]/div[6]' ) #找到这个元素在当前页面的坐标 box = await move_tag.bounding_box() print ( "目前点击的位置" ,box) # 讲鼠标移动到到其实元素的中心 await page.mouse.move(box[ "x" ] + box[ "width" ] / 2 , box[ "y" ] + box[ "height" ] / 2 ) # 按下鼠标 await page.mouse.down() #延时1.2s await page.wait_for_timeout( 1200 ) # 这里获取到x坐标中心点位置 x = box[ "x" ] + box[ "width" ] / 2 #识别到坐标后与网页上的比例 distance = int (get_gap_offset() / 1.97 ) - 30 #轨迹 move_distance = get_track_list(distance) print ( "最终坐标:" ,distance, "轨迹:" ,move_distance) for i in move_distance: x + = i await page.mouse.move(x, box[ "y" ]) await page.mouse.up() |
9、关闭窗口
1 | await browser.close() |
至此tx滑块的分析就结束了
然后我还写了一版selenium的,相比于playwright就会麻烦一些
案例一:tx滑块(selenium)
1 2 3 4 5 6 7 8 9 10 11 12 13 | 对于我们日常使用而言两者主要区别在于: 1 、selenium只支持同步,playwright可以支持异步的 2 、操作iframe,selenium来回切换iframe非常麻烦,而playwright只需要定位元素即可 2 、在监听请求这一点上,playwright的page.on非常好用,而selenium一般是借助browsermobproxy通过代理的方式进行拦截 使用方式:( 1 )https: / / github.com / lightbody / browsermob - proxy / releases,下载并解压 ( 2 )安装证书,参考链接https: / / www.bilibili.com / read / cv21263644 / ( 3 )调用方式 server = Server( 'browsermob-proxy-2.1.4/bin/browsermob-proxy' ) server.start() proxy = server.create_proxy(params = { 'trustAllServers' : 'true' }) option = ChromeOptions() option.add_argument( '--proxy-server={0}' . format ( self .proxy.proxy)) driver = webdriver.Chrome(options = option) |
这里就不细致讲解了,主要代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | class Tencent(): def __init__( self ): server = Server( 'browsermob-proxy-2.1.4/bin/browsermob-proxy' ) server.start() self .proxy = server.create_proxy(params = { 'trustAllServers' : 'true' }) self .url = 'aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu' option = ChromeOptions() option.add_experimental_option( 'excludeSwitches' , [ 'enable-automation' ]) option.add_experimental_option( 'useAutomationExtension' , False ) option.add_argument( '--proxy-server={0}' . format ( self .proxy.proxy)) self .proxy.new_har(options = { 'captureContent' : True , 'captureHeaders' : True }) self .driver = webdriver.Chrome(options = option) self .driver.execute_cdp_cmd( 'Page.addScriptToEvaluateOnNewDocument' , { 'source' : 'Object.defineProperty(navigator,"webdriver",{get: () => undefined})' }) with open ( 'stealth.min.js' ) as f: js = f.read() self .driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument" , { "source" : js}) self .driver.maximize_window() self .det = ddddocr.DdddOcr(det = False , ocr = False , show_ad = False ) self .headers = { 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' } def index( self ): """ 主流程 """ self .driver.get( self .url) time.sleep( 5 ) print ( "正在打开网页~~~" ) self .driver.find_element(by = By.XPATH, value = f '//*[@id="root"]/div/div[3]/div/div/div[5]/div/div' ).click() time.sleep( 1 ) self .driver.find_element(by = By.XPATH, value = f '//*[@id="root"]/div/div[3]/div/div/div[8]/div[2]/div' ).click() time.sleep( 5 ) self .driver.switch_to.frame( 'tcaptcha_iframe_dy' ) bg_style = self .driver.find_element( 'id' , 'slideBg' ).get_attribute( "style" ) cut_style = self .driver.find_element(by = By.XPATH, value = f '//*[@id="tcOperation"]/div[8]' ).get_attribute( "style" ) bg_url = re.findall( 'url\("(.*?)"\)' , str (bg_style))[ 0 ] cut_url = re.findall( 'url\("(.*?)"\)' , str (cut_style))[ 0 ] print ( "获取到背景图片url:" ,bg_url) print ( "获取到滑块图片url:" ,cut_url) with open ( "bg_picture.jpg" , "wb" ) as f: f.write(requests.get(bg_url).content) with open ( "cut_picture.png" , "wb" ) as f: f.write(requests.get(cut_url).content) def get_gap_offset( self ): """ 识别坐标 """ img = Image. open ( 'cut_picture.png' ) region = img.crop(( 160 , 508 , 243 , 595 )) # region.save(f 'cut_picture.png' ) with open ( 'bg_picture.jpg' , 'rb' ) as f: target_bytes = f.read() with open ( 'cut_picture.png' , 'rb' ) as f: background_bytes = f.read() res = self .det.slide_match(target_bytes, background_bytes, simple_target = True ) print ( "识别到的坐标位置:" ,res) distance = int (res[ 'target' ][ 0 ]) return distance def get_track( self , offset): ''' 计算滑块的移动轨迹 ''' offset - = 30 # 滑块并不是从0开始移动,有一个初始值 a = offset / 4 track = [a, a, a, a] return track def shake_mouse( self ): """ 模拟人手释放鼠标抖动 """ ActionChains( self .driver).move_by_offset(xoffset = - 2 , yoffset = 0 ).perform() ActionChains( self .driver).move_by_offset(xoffset = 2 , yoffset = 0 ).perform() def operate_slider( self , track): """ 拖动滑块 :param track: 运动轨迹 """ # 定位到拖动按钮 slider_bt = self .driver.find_element(by = By.XPATH,value = '//*[@id="tcOperation"]/div[6]' ) # 点击拖动按钮不放 ActionChains( self .driver).click_and_hold(slider_bt).perform() # 按正向轨迹移动 for i in track: ActionChains( self .driver).move_by_offset(xoffset = i, yoffset = 0 ).perform() time.sleep(random.random() / 100 ) # 每移动一次随机停顿0-1/100秒之间骗过了极验,通过率很高 time.sleep(random.random()) # 按逆向轨迹移动 back_tracks = [ - 1 , - 0.5 , - 1 ] for i in back_tracks: time.sleep(random.random() / 100 ) ActionChains( self .driver).move_by_offset(xoffset = i, yoffset = 0 ).perform() # 模拟人手抖动 self .shake_mouse() time.sleep(random.random()) # 松开滑块按钮 ActionChains( self .driver).release().perform() time.sleep( 2 ) def login( self ): ''' 实现主要的登陆逻辑 ''' self .index() distance = self .get_gap_offset() distance = int (distance / 1.97 ) track = self .get_track(distance) self .operate_slider(track) result = self .proxy.har for entry in result[ 'log' ][ 'entries' ]: if entry[ 'request' ][ 'url' ] = = 'https://t.captcha.qq.com/cap_union_new_verify' : print (entry[ 'request' ][ 'url' ],entry[ 'response' ][ 'content' ]) print (entry[ 'response' ][ 'content' ][ 'text' ]) |
案例三:ali滑块(playwright)
1 | 目标网站:aHR0cHM6Ly9wYXNzcG9ydC5kYW1haS5jbi9sb2dpbg = = |
1、前面的初始化流程
1 2 3 4 5 6 7 8 9 10 | async with async_playwright() as p: browser = await p.chromium.launch(headless = False , args = [ '--start-maximized' ]) context = await browser.new_context(viewport = { "width" : 1920 , "height" : 1080 }, no_viewport = True ) context.set_default_timeout( 8000 ) page = await context.new_page() await page.add_init_script(js) print ( "打开网页~~~" ) await page.goto( 'aHR0cHM6Ly9wYXNzcG9ydC5kYW1haS5jbi9sb2dpbg==' ) await page.wait_for_timeout( 1000 ) page.on( 'response' , on_response) |
2、输入账号密码
1 2 3 4 5 6 7 8 9 | #这里需要注意这个iframe,前面的iframe和后面出滑块之后的iframe属于包含关系 new_frame = page.frame_locator( 'iframe[id="alibaba-login-box"]' ) await page.wait_for_timeout( 1000 ) await new_frame.locator( '#fm-login-id' ).fill( "正确的手机号码" ) await page.wait_for_timeout( 1000 ) await new_frame.locator( '#fm-login-password' ).fill( "错误的密码" ) await page.wait_for_timeout( 1000 ) await new_frame.get_by_role( "button" , name = "登录" ).click() await page.wait_for_timeout( 1000 ) |
3、强制弹出滑块,并判断
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | #这里为了让它出滑块要先输出错误的密码,然后一直click,直到出滑块为止 while True : try : new_frame2 = new_frame.frame_locator( 'iframe[id="baxia-dialog-content"]' ) move_tag = new_frame2.locator( 'xpath=//*[@id="nc_1_n1z"]' ) number = await move_tag.count() if number> = 1 : box = await move_tag.bounding_box() print ( "目前点击的位置" , box) break else : print (f "没出滑块,重新点击" ) await page.wait_for_timeout( 1000 ) await new_frame.get_by_role( "button" , name = "登录" ).click() except : await new_frame.get_by_role( "button" , name = "登录" ).click() |
4、定位以及滑动
1 2 3 4 5 6 7 8 9 10 11 12 | async def move_down(page,box): await page.mouse.move(box[ "x" ] + box[ "width" ] / 2 , box[ "y" ] + box[ "height" ] / 2 ) await page.mouse.down() # 按下鼠标 await page.wait_for_timeout( 1200 ) x = box[ "x" ] + box[ "width" ] / 2 # 这里获取到x坐标中心点位置 move_distance = get_track_list( 265 ) print ( "轨迹:" ,move_distance) for i in move_distance: x + = i await page.mouse.move(x, box[ "y" ]) await page.mouse.up() await page.wait_for_timeout( 500 ) |
至此ali滑块的分析就结束了
结言
其实爬虫圈一直有个不好的习惯,就是协议>自动化,但是在面对企业与客户的需求时,我们所需要的是完整的解决方案,所以不管协议与自动化,只要解决问题就是最好的方式,不存在谁大于谁,另外我也运营了一个星球,后面所有的公众号文章的示例代码都会放到星球里面去,同时也会在星球解答大家的问题。
[招生]科锐逆向工程师培训(2024年11月15日实地,远程教学同时开班, 第51期)
最后于 2024-12-1 12:46
被Harden13编辑
,原因:
赞赏记录
参与人
雪币
留言
时间
沙漠炒哥
感谢你的积极参与,期待更多精彩内容!
2024-12-5 10:40
东方玻璃
感谢你的积极参与,期待更多精彩内容!
2024-12-1 13:11
科学上网
非常支持你的观点!
2024-12-1 12:54
赞赏
看原图
赞赏
雪币:
留言: