项目初始化

This commit is contained in:
shikong 2024-03-01 17:14:56 +08:00
commit dfede5f6df
8 changed files with 99 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/tmp
/driver
*.pyc

2
README.MD Normal file
View File

@ -0,0 +1,2 @@
### chrome driver 下载地址
https://googlechromelabs.github.io/chrome-for-testing/#stable

BIN
headless_result.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 583 KiB

38
main.py Normal file
View File

@ -0,0 +1,38 @@
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import time
import utils
options = webdriver.ChromeOptions()
options.add_argument('lang=zh-CN')
options.add_argument(
'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36')
# options.add_argument('--user-data-dir=./UserData/Google/Chrome/Default')
options.add_argument('--disable-gpu') # 如果不加这个选项,有时定位会出现问题
# options.add_argument('--headless') # 增加无界面选项
service = webdriver.ChromeService(r"./driver/chromedriver.exe")
driver = webdriver.Chrome(service=service,options=options)
driver.maximize_window()
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
# "source": """
# Object.defineProperty(navigator, 'webdriver', {
# get: () => undefined
# })
# """
# })
with open("./stealth.min.js") as f:
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": f.read()
})
driver.get("https://bot.sannysoft.com/")
time.sleep(5)
width = driver.execute_script("return document.documentElement.scrollWidth")
height = driver.execute_script("return document.documentElement.scrollHeight")
print(width, height)
utils.screenshot(driver=driver, save_path="./screenshot.png")
driver.set_window_size(width, height)
driver.save_screenshot('result.png')
time.sleep(30)

BIN
result.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

BIN
screenshot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 384 KiB

7
stealth.min.js vendored Normal file

File diff suppressed because one or more lines are too long

49
utils/__init__.py Normal file
View File

@ -0,0 +1,49 @@
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.common.by import By
from PIL import Image
import time
import os
_scroll_to_bottom = "window.scroll({top:document.body.clientHeight,left:0,behavior:'auto'});"
_scroll_to_y = "window.scroll({top:%d,left:0,behavior:'auto'});"
_tmp_dir = "./tmp"
_tmp_file = "%s/%%s_%%s.png" % _tmp_dir
_body = "//body"
def screenshot(driver:WebDriver, save_path):
file_name = os.path.basename(save_path)
if not os.path.exists(_tmp_dir):
os.mkdir(_tmp_dir)
tmp_file_list = []
try:
tmp_file = _tmp_file % (file_name, 0)
driver.save_screenshot(tmp_file)
tmp_file_list.append(tmp_file)
body_h = driver.execute_script("return document.documentElement.scrollHeight")
current_h = driver.execute_script("return document.documentElement.clientHeight")
for i in range(1, int(body_h / current_h)):
driver.execute_script(_scroll_to_y % (current_h * i))
time.sleep(0.2)
driver.save_screenshot(_tmp_file % (file_name, i))
tmp_file_list.append(_tmp_file % (file_name, i))
merge_images(tmp_file, _tmp_file % (file_name, i), tmp_file)
driver.execute_script(_scroll_to_bottom)
end_file = _tmp_file % (file_name, "bottom")
driver.save_screenshot(end_file)
tmp_file_list.append(end_file)
merge_images(tmp_file, end_file,save_path,int(current_h - int(body_h % current_h)))
finally:
# 删除临时文件
for tmp in tmp_file_list:
os.remove(tmp)
def merge_images(image1: str, image2: str,output: str,overlap_size=0):
size = overlap_size * 2
img1,img2 = Image.open(image1), Image.open(image2)
size1, size2 = img1.size, img2.size
merge = Image.new("RGB", (size1[0], size1[1] + size2[1] - size))
loc1, loc2 = (0, 0), (0, size1[1] - size)
merge.paste(img1, loc1)
merge.paste(img2, loc2)
merge.save(output)