代码如下:
import requests
import os
import json
import math
# 由于使用了requests第三方库,因此headers可能暂时不需要
headers = {
'Cookie': '''
BIDUPSID=2F56B41C608DF5A236E69D17F52E62DF; PSTM=1529402270; BAIDUID=85D43369E6DD3618039C668117839489:FG=1; BDUSS=hTODh1UktSNndvYy1EdE94b2FrfmlrRUlPS0lBYTRteUJpdFNBVHFxU0VvMWxiQUFBQUFBJCQAAAAAAAAAAAEAAAB23DcU6eTQxNPmuOgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIQWMluEFjJbaE; pgv_pvi=6708710400; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=7; H_PS_PSSID=26523_1443_21116_18559_20930; MCITY=-289%3A; M_LG_UID=339205238; M_LG_SALT=5c64ed8d91517fe31fc30e360e206f1c; validate=81692
''',
'User-Agent': '''
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36
'''
}
# 所有城市
cities = []
# 每页个数
page_size = 10
# 构建城市数据
def build_cities(city):
cities.append({
"name": city["name"],
"code": city["code"],
"num": city["num"]
})
# 获取城市数据
def get_cities():
url = r"https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=s&da_src=searchBox.button&wd=%E5%B0%8F%E5%8C%BA&c=1&src=0&wd2=&pn=100&sug=0&l=5&b=(8026451.220000001,1789787.88;15186259.22,7548763.88)&from=webmap&biz_forward={%22scaler%22:1,%22styles%22:%22pl%22}&sug_forward=&auth=y4=Y=EX0KFJVeL6JVaOHL44WUGVBZxNcuxHBVExVLREtIKlBT8Hca6a4cAZz1cv3uVtPWv3GuVtWvPYuxt8zv7u@ZPuVteuxtfiKKvAut@@YwB&tn=B_NORMAL_MAP&nn=0&u_loc=13091452,3834529&ie=utf-8&t=1530410690760"
data = requests.get(url).json()
# 除重庆外的直辖市,即热门城市前三个
for city in data["content"]:
build_cities(city)
# 其他城市
provinces = list(data["more_city"])
for province in provinces:
for city in province['city']:
cities.append(city)
for city in cities:
print("正在获取" + city["name"] + "的数据...")
total = city["num"]
house = []
# 分页获取小区数据
for page in range(int(math.ceil(total / page_size))):
print("正在获取第" + str(page + 1) + "页的数据...")
data = get_estate(city["code"], page)
house.append(data)
print("正在写入文件...")
save_file(city["name"], str(page + 1), data)
print("写入文件成功...")
# 获取每个城市的小区数据
def get_estate(code, pn):
url = r"https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=con&from=webmap&contp=1&da_src=pcmappg.clearify&on_gel=0&wd=%E5%B0%8F%E5%8C%BA&c=" + str(
code) + "&biz_forward={%22scaler%22:1,%22styles%22:%22pl%22}&pn=" + str(
pn) + "&auth=y4=Y=EX0KFJVeL6JVaOHL44WUGVBZxNcuxHBVExVzHVt2JjzSXGa@R@LayYx1cv3uVtPWv3GuBt8yR9xAw8zv7u@ZPuVteuztgjLLwBvt@@YwD&tn=B_NORMAL_MAP&nn=0&u_loc=13091452,3834529&ie=utf-8&l=5&t=1530410422816"
data = requests.get(url).json()
return data
# 写入文件
def save_file(city, page, data):
path = "data/" + city + "/"
if not os.path.exists(path):
os.makedirs(path)
with open(path + page + ".json", "w") as f:
json.dump(data, f, ensure_ascii=False)
if __name__ == '__main__':
get_cities()
代码没有做异常处理,因此可能会在网络不佳的情况下报错.
欢迎在评论区留下您的见解~