From e385e9845d32db1a31d0377eebb06c98727f0b41 Mon Sep 17 00:00:00 2001 From: shikong <919411476@qq.com> Date: Sun, 12 May 2024 02:52:40 +0800 Subject: [PATCH] =?UTF-8?q?=E6=89=B9=E9=87=8F=E7=88=AC=E5=8F=96=20fanbox?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run/__init__.py | 110 +++++++++++++++++++----------------------------- 1 file changed, 43 insertions(+), 67 deletions(-) diff --git a/run/__init__.py b/run/__init__.py index 119e429..4b0e347 100644 --- a/run/__init__.py +++ b/run/__init__.py @@ -47,29 +47,17 @@ def has_next(driver: WebDriver): return has_next -def download_file_link(driver: WebDriver, links): - for link in links: - print(link) - driver.get(link) - - time.sleep(1) - # 获取所有打开的窗口句柄 - # all_windows = driver.window_handles - # for window in all_windows: - # if window != original_window: - # driver.switch_to.window(window) - try: - el = driver.find_element(By.XPATH, "//div[contains(@class, 'FileContent__Wrapper')]") - if el is not None: - print(el) - a = el.find_element(By.TAG_NAME, "a") - if a is not None: - a.click() - except Exception as e: - print(e) - time.sleep(5) - # driver.back() - # driver.switch_to(original_window) +def download_file_link(driver: WebDriver): + try: + el = driver.find_element(By.XPATH, "//div[contains(@class, 'FileContent__Wrapper')]") + if el is not None: + print(el) + a = el.find_element(By.TAG_NAME, "a") + if a is not None: + a.click() + time.sleep(1) + except Exception as e: + pass def download_image(url: str, download_dir: str, proxies): @@ -86,55 +74,37 @@ def download_image(url: str, download_dir: str, proxies): print(f"下载结束 {url}, 保存路径:{save_path}") -def download_images(driver: WebDriver, links, download_dir: str, proxies): +def download_images(driver: WebDriver, link, download_dir: str, proxies): print(f"最大并发下载数: {workers}") with ThreadPoolExecutor(max_workers=workers) as worker: - img_list = [] - for link in links: - part_img_list = [] + part_img_list = [] - print(link) - # https://monpetit17.fanbox.cc/posts/5183527 - # https://api.fanbox.cc/post.info?postId=5183527 - driver.get(link) - print("打开页面") + try: + el = driver.find_element(By.XPATH, "//div[contains(@class, 'FileContent__Wrapper')]") + if el is not None: + return + except Exception as e: + pass - time.sleep(1) - try: - el = driver.find_element(By.XPATH, "//div[contains(@class, 'FileContent__Wrapper')]") - if el is not None: - continue - except Exception as e: - pass + for _ in range(0, 15): + driver.execute_script("window.scrollBy(0, 500)") + time.sleep(0.2) - # html = driver.find_element(By.TAG_NAME, "html") - # height = html.size['height'] - # for _ in range(500, height, 500): - # driver.execute_script("window.scrollBy(0, 500)") - # time.sleep(1.5) - for _ in range(0, 10): - driver.execute_script("window.scrollBy(0, 500)") - time.sleep(0.2) + img_link_elements = driver.find_elements(By.XPATH, "//a[contains(@class, 'PostImage__Anchor')]") - img_link_elements = driver.find_elements(By.XPATH, "//a[contains(@class, 'PostImage__Anchor')]") + sub_dir = link.split("/")[-1] + real_download_dir = os.path.join(download_dir, sub_dir) + if not os.path.exists(real_download_dir): + os.makedirs(real_download_dir) - sub_dir = link.split("/")[-1] - real_download_dir = os.path.join(download_dir, sub_dir) - if not os.path.exists(real_download_dir): - os.makedirs(real_download_dir) + for element in img_link_elements: + href = element.get_attribute("href") + print(href) + part_img_list.append(href) + worker.submit(download_image, href, real_download_dir, proxies) - for element in img_link_elements: - href = element.get_attribute("href") - print(href) - part_img_list.append(href) - - worker.submit(download_image, href, real_download_dir, proxies) - - print("获取 %d 个图片地址" % len(part_img_list)) - print(part_img_list) - img_list += part_img_list - - print("共 %d 个页面, 共计 %d 个图片" % (len(links), len(img_list))) + print("获取 %d 个图片地址" % len(part_img_list)) + print(part_img_list) def main(driver: WebDriver, download_dir: str, proxies): @@ -161,8 +131,11 @@ def main(driver: WebDriver, download_dir: str, proxies): url = page_url % start_page links += find_link(driver, url) - download_file_link(driver, links) - download_images(driver, links, download_dir, proxies) + for link in links: + driver.get(link) + time.sleep(1) + download_file_link(driver) + download_images(driver, link, download_dir, proxies) links = [] @@ -175,4 +148,7 @@ def main(driver: WebDriver, download_dir: str, proxies): links.append(sub_page) print("获取子页面链接", links) - download_images(driver, links, download_dir, proxies) + for link in links: + driver.get(link) + time.sleep(1) + download_images(driver, link, download_dir, proxies)