目錄
1、開發(fā)工具
2、第三方庫
3、實現(xiàn)思路
4.單個爬取B站視頻
5.批量爬取B站視頻
6.查找所需數(shù)據(jù)
結尾? ??
1、開發(fā)工具
Python3.9
pycharm
requests
和其他python內(nèi)置庫
2、第三方庫
安裝第三方庫
pip install requests
3、實現(xiàn)思路
1.用requests發(fā)送get請求,獲得下載鏈接
2.將下載到B站視頻和音頻保存到本地
3.使用ffmpeg來合并視頻和音頻。
4.并保存到本地。
4.單個爬取B站視頻
import os
import requests
import json
import re
from bs4 import BeautifulSoup
import subprocess
from detail_video import video_bvid
# video_bvid 是一個從外部得到的單個視頻ID
video_bvid = 'your-single-bvid'
class BilibiliVideoAudio:
def __init__(self, bvid):
self.bvid = bvid
self.headers = {
"referer": "https://search.bilibili.com/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90",
"origin": "https://search.bilibili.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Accept-Encoding': 'gzip, deflate, br'
}
def get_video_audio(self):
# 構造視頻鏈接并發(fā)送請求獲取頁面內(nèi)容
url = f'https://www.bilibili.com/video/{self.bvid}/?spm_id_from=333.337.search-card.all.click&vd_source=14378ecd144bed421affe1fe0ddd8981'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')
# 獲取視頻標題
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
title = meta_tag['content']
# 獲取視頻和音頻鏈接
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)
video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']
return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}
def download_video_audio(self, url, filename):
# 對文件名進行清理,去除不合規(guī)字符
filename = self.sanitize_filename(filename)
try:
# 發(fā)送請求下載視頻或音頻文件
resp = requests.get(url, headers=self.headers).content
download_path = os.path.join('D:\\video', filename) # 構造下載路徑
with open(download_path, mode='wb') as file:
file.write(resp)
print("{:*^30}".format(f"下載完成:{filename}"))
except Exception as e:
print(e)
def sanitize_filename(self, filename):
# 定義不合規(guī)字符的正則表達式
invalid_chars_regex = r'[\"*<>?\\|/:,]'
# 替換不合規(guī)字符為空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)
return sanitized_filename
def merge_video_audio(self, video_path, audio_path, output_path):
"""
使用ffmpeg來合并視頻和音頻。
"""
try:
command = [
'ffmpeg',
'-y', # 覆蓋輸出文件如果它已經(jīng)存在
'-i', video_path, # 輸入視頻路徑
'-i', audio_path, # 輸入音頻路徑
'-c', 'copy', # 復制原始數(shù)據(jù),不進行轉碼
output_path # 輸出視頻路徑
]
subprocess.run(command, check=True)
print(f"視頻和音頻合并完成:{output_path}")
except subprocess.CalledProcessError as e:
print(f"合并失敗: {e}")
def main():
try:
# 只處理一個 bvid
bilibili = BilibiliVideoAudio(video_bvid)
video_audio_info = bilibili.get_video_audio()
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
processed_videos_path = 'D:\\processed_videos'
if not os.path.exists(processed_videos_path):
os.makedirs(processed_videos_path)
video_filename = f"{title}.mp4"
audio_filename = f"{title}.mp3"
output_filename = f"{title} - combined.mp4"
video_file_path = os.path.join('D:\\video', video_filename)
audio_file_path = os.path.join('D:\\video', audio_filename)
output_file_path = os.path.join(processed_videos_path, output_filename)
bilibili.download_video_audio(video_url, video_filename) # 下載視頻
bilibili.download_video_audio(audio_url, audio_filename) # 下載音頻
bilibili.merge_video_audio(video_file_path, audio_file_path, output_file_path) # 合并視頻和音頻
# Optional: Delete the separate files after merge
# os.remove(video_file_path)
# os.remove(audio_file_path)
except Exception as ex:
print(f"Failed to process video/audio for {video_bvid}: {ex}")
main()
5.批量爬取B站視頻
# 批量爬取b站上的視頻
import os
import requests
import json
import re
from bs4 import BeautifulSoup
import subprocess
from detail_video import video_bvid
class BilibiliVideoAudio:
def __init__(self, bvid):
self.bvid = bvid
self.headers = {
"referer": "https://search.bilibili.com/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90",
"origin": "https://search.bilibili.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Accept-Encoding': 'gzip, deflate, br'
}
def get_video_audio(self):
# 構造視頻鏈接并發(fā)送請求獲取頁面內(nèi)容
url = f'https://www.bilibili.com/video/{self.bvid}/?spm_id_from=333.337.search-card.all.click&vd_source=14378ecd144bed421affe1fe0ddd8981'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')
# 獲取視頻標題
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
title = meta_tag['content']
# 獲取視頻和音頻鏈接
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)
video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']
return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}
def download_video_audio(self, url, filename):
# 對文件名進行清理,去除不合規(guī)字符
filename = self.sanitize_filename(filename)
try:
# 發(fā)送請求下載視頻或音頻文件
resp = requests.get(url, headers=self.headers).content
download_path = os.path.join('D:\\video', filename) # 構造下載路徑
with open(download_path, mode='wb') as file:
file.write(resp)
print("{:*^30}".format(f"下載完成:{filename}"))
except Exception as e:
print(e)
def sanitize_filename(self, filename):
# 定義不合規(guī)字符的正則表達式
invalid_chars_regex = r'[\"*<>?\\|/:,]'
# 替換不合規(guī)字符為空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)
return sanitized_filename
def merge_video_audio(self, video_path, audio_path, output_path):
"""
使用ffmpeg來合并視頻和音頻。
"""
try:
command = [
'ffmpeg',
'-y', # 覆蓋輸出文件如果它已經(jīng)存在
'-i', video_path, # 輸入視頻路徑
'-i', audio_path, # 輸入音頻路徑
'-c', 'copy', # 復制原始數(shù)據(jù),不進行轉碼
output_path # 輸出視頻路徑
]
subprocess.run(command, check=True)
print(f"視頻和音頻合并完成:{output_path}")
except subprocess.CalledProcessError as e:
print(f"合并失敗: {e}")
def main():
# 批量獲取多個視頻的bid
bvids = [
# 0 1 2 3
"BV187411i7zw","BV1wi4y1E7E6","BV1Gz4y1X7vh","BV1Lh411d7Lw", # 1
# 4 5 6 7
"BV1mJ411D7QB","BV1Z5411w7Xb","BV1op4y167kS","BV1Mp4y1p7Ck", # 2
# 8 9 10 11
"BV1nJ41187Zy","BV1qb4y1Z7JK","BV1f5411379u","BV1kt4y1Q792", # 3
# 12 13 14 15
"BV1Qy4y1e7kk","BV1T7411T7q6","BV1k64y1k7QL","BV1J5411c7Rw", # 4
# 16 17 18 19
"BV1Db4y1y7yL","BV1cC4y1878T","BV11y4y1z7bY","BV1LJ411S7ML", # 5
# 20 21 22 23
"BV1X54y1L7mt","BV1S64y1D7HM","BV1rK4y1d7mZ","BV1b64y1y7AE", # 6
# 24 25 26 27
"BV1TK411F7MU","BV1HN411f7Em","BV1QA411x7KB","BV1pM4y1K7Ao", # 7
# 28 29 30 31
"BV1os4y1s7Aw","BV1sv411e71L","BV1xZ4y1A7gn","BV1E3411B7Q3", # 8
# 32 33 34 35
"BV1664y1d78D","BV1xv41177MR","BV13q4y1S7y1","BV1kJ411H7y8", # 9
# 36 37 38 39
"BV1Cq4y1Z7pM","BV1Jf4y147U7","BV1az4y117h4","BV1gy4y1h7wS", # 10
]
for bvid in bvids:
try:
bilibili = BilibiliVideoAudio(bvid)
video_audio_info = bilibili.get_video_audio()
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
bilibili.download_video_audio(video_url, f"{title}.mp4") # 下載視頻
bilibili.download_video_audio(audio_url, f"{title}.mp3") # 下載音頻
except Exception as ex:
print(f"Failed to download video/audio for {bvid}: {ex}")
processed_videos_path = 'D:\\processed_videos'
if not os.path.exists(processed_videos_path):
os.makedirs(processed_videos_path)
# 注意:B站和其他短視頻平臺的視頻不同,需要分別下載視頻和音頻,最后將視頻和音頻拼接到一塊
for bvid in bvids:
try:
bilibili = BilibiliVideoAudio(bvid)
video_audio_info = bilibili.get_video_audio()
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
video_filename = f"{title}.mp4"
audio_filename = f"{title}.mp3"
output_filename = f"{title} - combined.mp4"
video_file_path = os.path.join('D:\\video', video_filename)
audio_file_path = os.path.join('D:\\video', audio_filename)
output_file_path = os.path.join(processed_videos_path, output_filename)
bilibili.download_video_audio(video_url, video_filename) # 下載視頻
bilibili.download_video_audio(audio_url, audio_filename) # 下載音頻
bilibili.merge_video_audio(video_file_path, audio_file_path, output_file_path) # 合并視頻和音頻
# Optional: Delete the separate files after merge
# os.remove(video_file_path)
# os.remove(audio_file_path)
except Exception as ex:
print(f"Failed to process video/audio for {bvid}: {ex}")
main()
6.查找所需數(shù)據(jù)
? ?1)bvid????
? ?2)referer /?origin /?User-Agent /?Accept-Encoding
? ?
結尾? ??
希望大家喜歡我的分享!!!文章來源:http://www.zghlxwxcb.cn/news/detail-836530.html
文章來源地址http://www.zghlxwxcb.cn/news/detail-836530.html
到了這里,關于Python爬蟲--爬取嗶哩嗶哩(B站)短視頻平臺視頻的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關文章,希望大家以后多多支持TOY模板網(wǎng)!