b站视频爬取案例
import requests from lxml import etree import json if __name__ == '__main__': # UA伪装 head= { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0", "Referer": "https://search.bilibili.com/all?", "Cookie": "buvid_fp_plain=undefined; DedeUserID=121991448; DedeUserID__ckMd5=9e0883af22947c47; CURRENT_BLACKGAP=0; buvid4=B52817A7-548A-24AF-F6D1-671539F8D84A46089-022112911-pIOydL%2Bx%2FtJ0KaVCrqVHDnapk68hRLXn0o6mnH1vuNcEyHPeCVTwrg%3D%3D; is-2022-channel=1; enable_web_push=DISABLE; header_theme_version=CLOSE; rpdid=|(m))m~uRmm0J'u~|RJlulkm; FEED_LIVE_VERSION=V_WATCHLATER_PIP_WINDOW2; PVID=1; fingerprint=d26d60ed0ec0d0f7a6b5056693a8acbb; buvid_fp=d26d60ed0ec0d0f7a6b5056693a8acbb; CURRENT_QUALITY=80; buvid3=7F1C4251-972B-A6E5-38AD-63626520698301889infoc; b_nut=1733312401; _uuid=CC9A4FE3-ECB2-A4DD-710310-83446BD14310A02817infoc; home_feed_column=5; browser_resolution=1707-946; bp_t_offset_121991448=1012262974930288640; b_lsid=C3B8510D4_193F8977A84; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzUzMDE2OTgsImlhdCI6MTczNTA0MjQzOCwicGx0IjotMX0.9UDClgiB9GUJu1e6ZsyN6e-qeKtxbDOL1V8OdLFuIJo; bili_ticket_expires=1735301638; SESSDATA=43e92030%2C1750594499%2C7d892%2Ac1CjDdpJOFi23g70Eic2Tw3nEMZnLYzkVoT_5Nm4swH5FTg8uq5rFABX3tQAXj3SfJX0USVkFXWGRFSlhwS0hHWHdoc0tzLVBhVEZJWWI4OWdxeVZJLTBUdldEY2lZS1dOUmFJSTJwYklaM0ktVjJYb0lJZW9vdTcyWUlLOXJBM0h0VERtWWF1RzhBIIEC; bili_jct=d581670c772ec7051f16a05ec30fcb8b; sid=79g6fnjf; CURRENT_FNVAL=4048" } # 1、指定url url ="https://www.bilibili.com/video/BV1N84y1P7en/?spm_id_from=333.337.search-card.all.click&vd_source=15b581d46c0893c3904b6158b122aca5" # 2、发送请求 response = requests.get(url, headers = head) # 3、获取响应的数据 res_text = response.text # 4、数据解析 tree = etree.HTML(res_text) with open("bili2.html", "w", encoding="utf-8") as f: f.write(res_text) base_info = "".join(tree.xpath("/html/head/script[4]/text()"))[20:] info_dict = json.loads(base_info) video_url = info_dict["data"]["dash"]['video'][0]["baseUrl"] audio_url = info_dict["data"]["dash"]['audio'][0]["baseUrl"] video_content = requests.get(video_url, headers=head).content audio_content = requests.get(audio_url, headers=head).content with open("video2.wmv", "wb") as f: f.write(video_content) with open("audio2.mp4", "wb") as fp: fp.write(audio_content)
b站的视频,分为视频和音频两部分
b站的音视频存储部分在`window.__playinfo__` 变量中
打开f12,点击network,然后使用快捷键CTRL + F 搜索 `window.__playinfo__` ,找到类似于下面的数据:
其中`window.__playinfo__` 中有 video 属性 和 audio属性,分别对应视频和音频,然后用提取工具分别提取这两个属性中的 `baseUrl` 属性的值
提出取来后,分别保存 `.wmv` 和 `mp4` 文件到本地,然后就可以用音视频合成的工具,将刚刚保存到两个视频和音频文件合成即可。