【代码】Python3爬取ZFile站点的所有文件

前言

Python3爬取ZFile站点的所有文件

下载依赖

1
2
pip3 install requests
pip3 install loguru

源代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import json
import time

import requests
from loguru import logger

# 配置爬取图片存放目录
base_src = "./cosersets"
# 配置日志输出到文件
log_src = "./log.log"
logger.add(log_src, format="{time} {level} {message}", level="INFO")
# 配置代理
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"


def get_url(src):
return f"https://www.cosersets.com/api/list/1?path={src}&password=&orderBy=&orderDirection="


def get_response_list(url):
# time.sleep(1)
response = requests.get(url)
logger.debug(f"创建请求对象")
response_text = response.text
response_object = json.loads(response_text)
if response_object["code"] == 0:
file_object_list = response_object["data"]["files"]
for file_object in file_object_list:
logger.debug(f"响应对象: {file_object}")
if file_object["type"] == "FOLDER":
logger.info(f"进入到子级URL目录: {file_object['path']}{file_object['name']}")
get_response_list(get_url(f"{file_object['path']}{file_object['name']}"))
logger.info(f"回到了父级URL目录: {url}")
if file_object["type"] == "FILE":
# 判断目录是否存在
if not os.path.exists(f"{base_src}{file_object['path']}"):
# 创建目录
os.makedirs(f"{base_src}{file_object['path']}")
logger.info(f"新建本地目录: {base_src}{file_object['path']}")
# 判断文件是否存在
if not os.path.exists(f"{base_src}{file_object['path']}{file_object['name']}") or os.path.getsize(f"{base_src}{file_object['path']}{file_object['name']}") == 0:
logger.info(f"正在下载到本地: {base_src}{file_object['path']}{file_object['name']}")
# 下载文件
with open(f"{base_src}{file_object['path']}{file_object['name']}", "wb") as f:
response = requests.get(file_object['url'])
f.write(response.content)
response.close()
logger.debug(f"请求对象已销毁")
logger.info(f"下载到本地完成: {base_src}{file_object['path']}{file_object['name']}")
else:
logger.info(f"文件已存在跳过下载: {base_src}{file_object['path']}{file_object['name']}")
response.close()
logger.debug(f"请求对象已销毁")


if __name__ == '__main__':
get_response_list(get_url("/"))

完成