批量爬取 fanbox
This commit is contained in:
parent
7456e3dd82
commit
4accaec286
@ -76,7 +76,7 @@ def download_image(url: str, download_dir: str, proxies):
|
|||||||
print(f"开始下载:{url}")
|
print(f"开始下载:{url}")
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||||
"Referer": "https://monpetit17.fanbox.cc/",
|
"Referer": "https://www.fanbox.cc/",
|
||||||
"Cookie": "p_ab_id=0; p_ab_id_2=1; p_ab_d_id=1743506050; _gcl_au=1.1.1973053698.1715103653; cf_clearance=9nglHcFCr9D17iz8392jJQIJi4oN8TSpBsvlS4oPjvs-1715103654-1.0.1.1-DLbaQVWB8O8lyUZyJ5P8ToQD7Bx7dt5r_7KUz9aFqIcEM5GiAunuXwAQRu5BJ3c3zBOt242Oy13YvXz1omV5Dw; FANBOXSESSID=35206256_01xWtpm33E6tjKYTAdLoVaZq5xceRVNB; privacy_policy_agreement=6; privacy_policy_notification=0; _gid=GA1.2.1663028388.1715442061; __cf_bm=YRkVSwzNtFz96eLrGB3KhENnEVv7lkYdc898q2AF3G0-1715447846-1.0.1.1-yJ.D2R_c.jS8SErn4fAmIg6fShYSfc2h_m4vQrehVA5UpRV7rIsjnYCGTZLro7JW9nh1r0Hu853rOcvoy6hSrA; cf_clearance=QQWAR7NhgwYZjtKExO9v0IE2eHNkQweSMLRVugqL3mE-1715447848-1.0.1.1-JhfPisIW0GNy135ks_mIObi9.X.FmmorhRl_Fows5nrHEQuPBt2S7CY_lnB4vCipSp4Xq.QrKW5oscwSCMk_Hw; _gat_gtag_UA_1830249_145=1; _ga_D9TLP3EFER=GS1.1.1715442066.2.1.1715448098.22.0.0; _ga=GA1.1.530858074.1715103653"
|
"Cookie": "p_ab_id=0; p_ab_id_2=1; p_ab_d_id=1743506050; _gcl_au=1.1.1973053698.1715103653; cf_clearance=9nglHcFCr9D17iz8392jJQIJi4oN8TSpBsvlS4oPjvs-1715103654-1.0.1.1-DLbaQVWB8O8lyUZyJ5P8ToQD7Bx7dt5r_7KUz9aFqIcEM5GiAunuXwAQRu5BJ3c3zBOt242Oy13YvXz1omV5Dw; FANBOXSESSID=35206256_01xWtpm33E6tjKYTAdLoVaZq5xceRVNB; privacy_policy_agreement=6; privacy_policy_notification=0; _gid=GA1.2.1663028388.1715442061; __cf_bm=YRkVSwzNtFz96eLrGB3KhENnEVv7lkYdc898q2AF3G0-1715447846-1.0.1.1-yJ.D2R_c.jS8SErn4fAmIg6fShYSfc2h_m4vQrehVA5UpRV7rIsjnYCGTZLro7JW9nh1r0Hu853rOcvoy6hSrA; cf_clearance=QQWAR7NhgwYZjtKExO9v0IE2eHNkQweSMLRVugqL3mE-1715447848-1.0.1.1-JhfPisIW0GNy135ks_mIObi9.X.FmmorhRl_Fows5nrHEQuPBt2S7CY_lnB4vCipSp4Xq.QrKW5oscwSCMk_Hw; _gat_gtag_UA_1830249_145=1; _ga_D9TLP3EFER=GS1.1.1715442066.2.1.1715448098.22.0.0; _ga=GA1.1.530858074.1715103653"
|
||||||
}
|
}
|
||||||
save_path = os.path.join(download_dir, url.split("/")[-1])
|
save_path = os.path.join(download_dir, url.split("/")[-1])
|
||||||
@ -148,24 +148,31 @@ def main(driver: WebDriver, download_dir: str, proxies):
|
|||||||
|
|
||||||
links = []
|
links = []
|
||||||
|
|
||||||
|
base_url = "https://www.fanbox.cc/@laserflip/posts"
|
||||||
|
page_url = base_url + "?page=%d"
|
||||||
|
post_url = base_url + "/%s"
|
||||||
|
|
||||||
start_page = 1
|
start_page = 1
|
||||||
url = "https://monpetit17.fanbox.cc/posts?page=%d" % start_page
|
url = page_url % start_page
|
||||||
links += find_link(driver, url)
|
links += find_link(driver, url)
|
||||||
|
|
||||||
while has_next(driver):
|
while has_next(driver):
|
||||||
start_page += 1
|
start_page += 1
|
||||||
url = "https://monpetit17.fanbox.cc/posts?page=%d" % start_page
|
url = page_url % start_page
|
||||||
links += find_link(driver, url)
|
links += find_link(driver, url)
|
||||||
|
|
||||||
|
download_file_link(driver, links)
|
||||||
|
download_images(driver, links, download_dir, proxies)
|
||||||
|
|
||||||
|
links = []
|
||||||
|
|
||||||
for sub_dir in os.listdir(download_dir):
|
for sub_dir in os.listdir(download_dir):
|
||||||
f = os.path.join(download_dir, sub_dir)
|
f = os.path.join(download_dir, sub_dir)
|
||||||
if os.path.isfile(f):
|
if os.path.isfile(f):
|
||||||
continue
|
continue
|
||||||
if len(os.listdir(f)) == 0:
|
if len(os.listdir(f)) == 0:
|
||||||
sub_page = "https://monpetit17.fanbox.cc/posts/%s" % sub_dir
|
sub_page = post_url % sub_dir
|
||||||
links.append(sub_page)
|
links.append(sub_page)
|
||||||
|
|
||||||
print("获取子页面链接", links)
|
print("获取子页面链接", links)
|
||||||
|
|
||||||
download_file_link(driver, links)
|
|
||||||
download_images(driver, links, download_dir, proxies)
|
download_images(driver, links, download_dir, proxies)
|
||||||
|
Loading…
Reference in New Issue
Block a user