爬取景区源码

news/2025/1/18 9:04:17/

import requests
import json
import csvpostUrl = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"# 将景点poiId和名称添加到此处
urls = [["75487323","凤凰雾涧江景民宿"],["11052220","古童临江客栈"],["55918524","凤凰等你来三生三世艺术民宿"],["15911807","凤凰虎耳草屋江景民宿"],["42687808","凤凰素履莲花 璞树漫居江景度假民宿"],["66065748","凤凰金水岸 慕名主题文创体验民宿"],["64225492","凤凰雪晴集 人文半山民宿"],["7078046","等你来 倾城轻奢民宿"],["6842040","凤凰沱水人家精品民宿"],["17541312","凤凰云桥自在江景精品民宿"],
]
# urls = [
#     # ['76865', '星海广场'],
#     ['75628', '棒棰岛'],
#     ['75633', '大连森林动物园'],
#     ['60514877', '三寰牧场'],
#     ['75635', '劳动公园'],
#     ['23035466', '东港音乐喷泉广场'],
#     ['79494', '海之韵广场'],
#     ['87618', '金石滩度假区'],
#     ['87748', '滨海路'],
#     ['87647', '滨海国家地质公园'],
#     ['24845945', '莲花山观景台'],
#     ['92196', '白玉山景区'],
#     ['13301914', '大连天门山国家森林公园'],
# ]for id in urls:print("正在爬取景点：", id[1])# 通过返回值判断总评论数，每页9条，计算出总页数，对大于2000条的数据只爬取两千条data_pre = {"arg": {"channelType": 2,"collapseType": 0,"commentTagId": 0,"pageIndex": 1,"pageSize": 10,"poiId": id[0],"sourceType": 1,"sortType": 3,"starType": 0},"head": {"cid": "09031069112760102754","ctok": "","cver": "1.0","lang": "01","sid": "8888","syscode": "09","auth": "","xsid": "","extension": []}}html = requests.post(postUrl, data=json.dumps(data_pre)).texthtml = json.loads(html)# 确定总页数总页数total_page = int(html['result']['totalCount'] / 10)if total_page > 300:total_page = 300# 遍历查询评论print("总页数:", total_page, "爬取中")# 创建写入csv文件path = './dalian/' + str(id[1]) + '.csv'xuhao = 0with open(path, 'w', newline='', encoding='utf-8') as f:file = csv.writer(f)file.writerow(['序号', '景区ID', '景区名称', '评论'])for page in range(1, int(total_page) + 1):data = {"arg": {"channelType": 2,"collapseType": 0,"commentTagId": 0,"pageIndex": page,"pageSize": 10,"poiId": id[0],"sourceType": 1,"sortType": 3,"starType": 0},"head": {"cid": "09031069112760102754","ctok": "","cver": "1.0","lang": "01","sid": "8888","syscode": "09","auth": "","xsid": "","extension": []}}html = requests.post(postUrl, data=json.dumps(data)).texthtml = json.loads(html)# 获取评论for j in range(10):result = html['result']['items'][j]['content']file.writerow([xuhao, id[0], id[1], result])print([xuhao, id[0], id[1], result])xuhao += 1print(id[1], "爬取完成")

爬取景区源码

相关文章

【容器化应用程序设计和开发】2.2 容器编排和Kubernetes调度

超全！超详细！2023大厂前端面试题手册

laravel-admin安装wang-Editor3之坑

k8s之审计日志

关于C#中委托的思考与实例

【计算几何】判断一条线段和一段圆弧是否相交 C++代码实现

MySQL库和表

win10远程桌面控制Ubuntu服务器 - 内网穿透实现公网远程