class
spider:
def
__init__(
self
):
self
.session
=
requests.session()
def
getdata(
self
, url):
headers
=
{
'Connection'
:
'keep-alive'
,
'Pragma'
:
'no-cache'
,
'Cache-Control'
:
'no-cache'
,
'sec-ch-ua'
:
'" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"'
,
'sec-ch-ua-mobile'
:
'?0'
,
'Upgrade-Insecure-Requests'
:
'1'
,
'User-Agent'
:
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'
,
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
,
'Sec-Fetch-Site'
:
'same-origin'
,
'Sec-Fetch-Mode'
:
'navigate'
,
'Sec-Fetch-User'
:
'?1'
,
'Sec-Fetch-Dest'
:
'document'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
}
response
=
self
.session.get(url, headers
=
headers)
html
=
response.text
if
"******"
not
in
response.text:
pattern
=
re.
compile
(
"<script>(.*)</script>"
,re.S)
jscode
=
pattern.findall(response.text)[
0
]
with
open
(
"./MyProxy.js"
,
"r"
) as f:
envcode
=
f.read()
getcookie
=
"function getcookie(){return b;}"
allcode
=
envcode
+
jscode
+
"\n"
+
getcookie;
with
open
(
"./allcode.js"
,
"w"
) as f:
f.write(allcode)
ctx
=
execjs.
compile
(allcode)
spvrscode
=
ctx.call(
"getcookie"
)
requests.utils.add_dict_to_cookiejar(
self
.session.cookies, {
"spvrscode"
: spvrscode})
response
=
self
.session.get(
'***********'
, headers
=
headers)
html
=
response.text
print
(
self
.session.cookies.values())
if
__name__
=
=
'__main__'
:
s
=
spider();
s.getdata(
"https://***********"
)