python-selenium-spider/main.py

48 lines
1.6 KiB
Python
Raw Permalink Normal View History

2024-03-01 17:14:56 +08:00
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import time
2024-03-02 03:21:47 +08:00
import run
2024-03-01 17:14:56 +08:00
2024-05-12 01:41:09 +08:00
download_dir = r'E:\Repository\python-selenium-spider\download'
proxies = {
"http": "socks5://127.0.0.1:1080",
"https": "socks5://127.0.0.1:1080",
}
2024-03-02 03:21:47 +08:00
if __name__ == '__main__':
options = webdriver.ChromeOptions()
options.add_argument('lang=zh-CN')
options.add_argument(
'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36')
2024-05-12 01:41:09 +08:00
options.add_argument(r'--user-data-dir=E:\Repository\python-selenium-spider\tmp\UserData')
2024-03-02 03:21:47 +08:00
options.add_argument('--disable-gpu') # 如果不加这个选项,有时定位会出现问题
# options.add_argument('--headless') # 增加无界面选项
2024-03-01 17:14:56 +08:00
2024-05-12 01:41:09 +08:00
options.add_experimental_option("prefs", {
"download.default_directory": download_dir
})
2024-03-02 03:21:47 +08:00
service = webdriver.ChromeService(r"./driver/chromedriver.exe")
driver = webdriver.Chrome(service=service,options=options)
# driver.maximize_window()
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
# "source": """
# Object.defineProperty(navigator, 'webdriver', {
# get: () => undefined
# })
# """
# })
2024-03-02 03:24:19 +08:00
2024-03-02 03:21:47 +08:00
try:
with open("./stealth.min.js") as f:
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": f.read()
})
2024-03-01 17:14:56 +08:00
2024-05-12 01:41:09 +08:00
run.main(driver=driver, download_dir=download_dir, proxies=proxies)
time.sleep(86400)
2024-03-02 03:21:47 +08:00
finally:
2024-05-12 01:41:09 +08:00
pass
# driver.quit()