【代码】Python3爬取中国大陆手机号段

前言

Python3爬取中国大陆手机号段

源代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import time

import requests
from bs4 import BeautifulSoup
import lxml

province_list = ["山东", "江苏", "安徽", "四川", "陕西", "湖北", "北京", "天津", "上海", "广东", "广西", "浙江", "河南", "甘肃", "吉林", "辽宁", "内蒙古", "新疆", "黑龙江", "福建", "河北", "重庆", "海南", "江西", "山西", "湖南", "青海", "贵州", "宁夏", "云南", "西藏"]
operator_list = {
"中国联通": [130, 131, 132, 145, 146, 155, 156, 166, 167, 171, 175, 176, 185, 186, 196],
"中国电信": [133, 141, 149, 153, 162, 170, 173, 174, 177, 180, 181, 189, 190, 191, 193, 199],
"中国移动": [134, 135, 136, 137, 138, 139, 147, 148, 150, 151, 152, 157, 158, 159, 165, 172, 178, 182, 183, 184, 187, 188, 195, 197, 198],
"中国广电": [192]
}

base_url = "https://telphone.cn"

# index = requests.get("").text
# print(index)

# 遍历运营商
for operator_name, number_segment_list in operator_list.items():
# 遍历号段
for number_segment in number_segment_list:
# 遍历省份
for province in province_list:
html_province = requests.get(f"{base_url}/prefix/{province}{number_segment}").text
soup_province = BeautifulSoup(html_province, 'lxml')
a_list_province = soup_province.select("section")[0].select(".list-box__grid .list-box__item a")
# 遍历城市
for a_province in a_list_province:
city = a_province.attrs["href"][1:-1].split("/")[1].split("1")[0]
html_city = requests.get(f"{base_url}/prefix/{city}{number_segment}").text
soup_city = BeautifulSoup(html_city, 'lxml')
a_list_city = soup_city.select("section")[0].select(".list-box__grid .list-box__item a")
for a_city in a_list_city:
number_prefix = a_city.attrs["href"][1:-1].split("/")[1]
with open("number-prefix.txt", "a") as f:
f.write(f"{number_prefix}\n")
print(f"写入文件完成: {province} - {city} - {number_prefix}")
print(f"等待1s")
time.sleep(1)
print("全部完成")

完成