python-selenium-spider/utils/__init__.py

50 lines
1.9 KiB
Python
Raw Permalink Normal View History

2024-03-01 17:14:56 +08:00
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.common.by import By
from PIL import Image
import time
import os
_scroll_to_bottom = "window.scroll({top:document.body.clientHeight,left:0,behavior:'auto'});"
_scroll_to_y = "window.scroll({top:%d,left:0,behavior:'auto'});"
_tmp_dir = "./tmp"
_tmp_file = "%s/%%s_%%s.png" % _tmp_dir
_body = "//body"
def screenshot(driver:WebDriver, save_path):
file_name = os.path.basename(save_path)
if not os.path.exists(_tmp_dir):
os.mkdir(_tmp_dir)
2024-03-02 16:15:32 +08:00
2024-03-01 17:14:56 +08:00
tmp_file_list = []
try:
tmp_file = _tmp_file % (file_name, 0)
driver.save_screenshot(tmp_file)
tmp_file_list.append(tmp_file)
body_h = driver.execute_script("return document.documentElement.scrollHeight")
current_h = driver.execute_script("return document.documentElement.clientHeight")
for i in range(1, int(body_h / current_h)):
driver.execute_script(_scroll_to_y % (current_h * i))
2024-03-02 16:15:32 +08:00
time.sleep(0.5)
2024-03-01 17:14:56 +08:00
driver.save_screenshot(_tmp_file % (file_name, i))
tmp_file_list.append(_tmp_file % (file_name, i))
merge_images(tmp_file, _tmp_file % (file_name, i), tmp_file)
driver.execute_script(_scroll_to_bottom)
end_file = _tmp_file % (file_name, "bottom")
driver.save_screenshot(end_file)
tmp_file_list.append(end_file)
merge_images(tmp_file, end_file,save_path,int(current_h - int(body_h % current_h)))
finally:
# 删除临时文件
for tmp in tmp_file_list:
os.remove(tmp)
2024-03-02 16:15:32 +08:00
2024-03-01 17:14:56 +08:00
def merge_images(image1: str, image2: str,output: str,overlap_size=0):
2024-03-02 16:15:32 +08:00
size = overlap_size
2024-03-01 17:14:56 +08:00
img1,img2 = Image.open(image1), Image.open(image2)
size1, size2 = img1.size, img2.size
merge = Image.new("RGB", (size1[0], size1[1] + size2[1] - size))
loc1, loc2 = (0, 0), (0, size1[1] - size)
merge.paste(img1, loc1)
merge.paste(img2, loc2)
merge.save(output)