Python实现批量采集商品数据的示例详解
目录
本次目的知识点开发环境代码本次目的
python批量采集某商品数据
知识点
requests 发送请求
re 解析网页数据
json 类型数据提取
csv 表格数据保存
开发环境
python 3.8
pycharm
requests
代码
导入模块
import json import random import time import csv import requests import re import pymysql
核心代码
# 连接数据库 def save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick): count = pymysql.connect( host="xxx.xxx.xxx.xxx", # 数据库地址 port=3306, # 数据库端口 user="xxxx", # 数据库账号 password="xxxx", # 数据库密码 db="xxxx" # 数据库表名 ) # 创建数据库对象 db = count.cursor() # 写入sql sql = f"insert into goods(title, pic_url, detail_url, view_price, item_loc, view_sales, nick) values ("{title}", "{pic_url}", "{detail_url}", {view_price}, "{item_loc}", "{view_sales}", "{nick}")" # 执行sql db.execute(sql) # 保存修改内容 count.commit() db.close() headers = { "cookie": "miid=4137864361077413341; tracknick=%5Cu5218%5Cu6587%5Cu9F9978083283; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; cna=MNI4GicXYTQCAa8APqlAWWiS; enc=%2FWC5TlhZCGfEq7Zm4Y7wyNToESfZVxhucOmHkanuKyUkH1YNHBFXacrDRNdCFeeY9y5ztSufV535NI0AkjeX4g%3D%3D; t=ad15767ffa6febb4d2a8709edebf63d3; lgc=%5Cu5218%5Cu6587%5Cu9F9978083283; sgcookie=E100EcWpAN49d4Uc3MkldEc205AxRTa81RfV4IC8X8yOM08mjVtdhtulkYwYybKSRnCaLHGsk1mJ6lMa1TO3vTFmr7MTW3mHm92jAsN%2BOA528auARfjf2rnOV%2Bx25dm%2BYC6l; uc3=nk2=ogczBg70hCZ6AbZiWjM%3D&vt3=F8dCvCogB1%2F5Sh1kqHY%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D&id2=UNGWOjVj4Vjzwg%3D%3D; uc4=nk4=0%40oAWoex2a2MA2%2F2I%2FjFnivZpTtTp%2F2YKSTg%3D%3D&id4=0%40UgbuMZOge7ar3lxd0xayM%2BsqyxOW; _cc_=W5iHLLyFfA%3D%3D; _m_h5_tk=ac589fc01c86be5353b640607e791528_1647451667088; _m_h5_tk_enc=7d452e4e140345814d5748c3e31fc355; xlly_s=1; x5sec=7b227365617263686170703b32223a223264393234316334363365353038663531353163633366363036346635356431434c61583635454745506163324f2f6b2b2b4b6166686f4d4d7a45774e7a4d794d6a59324e4473784d4b6546677037382f2f2f2f2f77453d227d; JSESSIONID=1F7E942AC30122D1C7DBA22C429521B9; tfstk=cKKGBRTY1F71aDbHPcs6LYjFVa0dZV2F6iSeY3hEAYkCuZxFizaUz1sbK1hS_r1..; l=eBEVp-O4gnqzSzLbBOfwnurza77OIIRAguPzaNbMiOCPO75p5zbNW60wl4L9CnGVhsTMR3lRBzU9BeYBqo44n5U62j-la1Hmn; isg=BDw8SnVxcvXZcEU4ugf-vTadDdruNeBfG0WXdBa9WicK4dxrPkd97hHTxQmZqRi3", "referer": "https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=", "sec-ch-ua": "" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": ""Windows"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36", } with open("淘宝.csv", mode="a", encoding="utf-8", newline="") as f: csv_writer = csv.writer(f) csv_writer.writerow(["title", "pic_url", "detail_url", "view_price", "item_loc", "view_sales", "nick"]) for page in range(1, 101): url = f"https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s={44*page}" response = requests.get(url=url, headers=headers) json_str = re.findall("g_page_config = (.*);", response.text)[0] json_data = json.loads(json_str) auctions = json_data["mods"]["itemlist"]["data"]["auctions"] for auction in auctions: try: title = auction["raw_title"] pic_url = auction["pic_url"] detail_url = auction["detail_url"] view_price = auction["view_price"] item_loc = auction["item_loc"] view_sales = auction["view_sales"] nick = auction["nick"] print(title, pic_url, detail_url, view_price, item_loc, view_sales, nick) save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick) with open("淘宝.csv", mode="a", encoding="utf-8", newline="") as f: csv_writer = csv.writer(f) csv_writer.writerow([title, pic_url, detail_url, view_price, item_loc, view_sales, nick]) except: pass time.sleep(random.randint(3, 5))
效果展示
到此这篇关于Python实现批量采集商品数据的示例详解的文章就介绍到这了,更多相关Python采集商品数据内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!
X 关闭
X 关闭
- 15G资费不大降!三大运营商谁提供的5G网速最快?中国信通院给出答案
- 2联想拯救者Y70发布最新预告:售价2970元起 迄今最便宜的骁龙8+旗舰
- 3亚马逊开始大规模推广掌纹支付技术 顾客可使用“挥手付”结账
- 4现代和起亚上半年出口20万辆新能源汽车同比增长30.6%
- 5如何让居民5分钟使用到各种设施?沙特“线性城市”来了
- 6AMD实现连续8个季度的增长 季度营收首次突破60亿美元利润更是翻倍
- 7转转集团发布2022年二季度手机行情报告:二手市场“飘香”
- 8充电宝100Wh等于多少毫安?铁路旅客禁止、限制携带和托运物品目录
- 9好消息!京东与腾讯续签三年战略合作协议 加强技术创新与供应链服务
- 10名创优品拟通过香港IPO全球发售4100万股 全球发售所得款项有什么用处?