1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| import time
import requests from bs4 import BeautifulSoup import lxml
province_list = ["山东", "江苏", "安徽", "四川", "陕西", "湖北", "北京", "天津", "上海", "广东", "广西", "浙江", "河南", "甘肃", "吉林", "辽宁", "内蒙古", "新疆", "黑龙江", "福建", "河北", "重庆", "海南", "江西", "山西", "湖南", "青海", "贵州", "宁夏", "云南", "西藏"] operator_list = { "中国联通": [130, 131, 132, 145, 146, 155, 156, 166, 167, 171, 175, 176, 185, 186, 196], "中国电信": [133, 141, 149, 153, 162, 170, 173, 174, 177, 180, 181, 189, 190, 191, 193, 199], "中国移动": [134, 135, 136, 137, 138, 139, 147, 148, 150, 151, 152, 157, 158, 159, 165, 172, 178, 182, 183, 184, 187, 188, 195, 197, 198], "中国广电": [192] }
base_url = "https://telphone.cn"
for operator_name, number_segment_list in operator_list.items(): for number_segment in number_segment_list: for province in province_list: html_province = requests.get(f"{base_url}/prefix/{province}{number_segment}").text soup_province = BeautifulSoup(html_province, 'lxml') a_list_province = soup_province.select("section")[0].select(".list-box__grid .list-box__item a") for a_province in a_list_province: city = a_province.attrs["href"][1:-1].split("/")[1].split("1")[0] html_city = requests.get(f"{base_url}/prefix/{city}{number_segment}").text soup_city = BeautifulSoup(html_city, 'lxml') a_list_city = soup_city.select("section")[0].select(".list-box__grid .list-box__item a") for a_city in a_list_city: number_prefix = a_city.attrs["href"][1:-1].split("/")[1] with open("number-prefix.txt", "a") as f: f.write(f"{number_prefix}\n") print(f"写入文件完成: {province} - {city} - {number_prefix}") print(f"等待1s") time.sleep(1) print("全部完成")
|