Skip to content

Commit dc30daf

Browse files
authored
Create huya.py
1 parent ca47afe commit dc30daf

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

moumoubaimifan/huya/huya.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import requests
2+
from requests.models import requote_uri
3+
from bs4 import BeautifulSoup
4+
import time
5+
import random
6+
import json
7+
import re
8+
9+
url_file_name = 'D:\\url.txt'
10+
11+
def get_list():
12+
for p in range(500):
13+
html = requests.get('https://v.huya.com/g/all?set_id=31&order=hot&page={}'.format(p+1));
14+
soup = BeautifulSoup(html.text, 'html.parser')
15+
ul = soup.find('ul', class_='vhy-video-list w215 clearfix')
16+
lis = ul.find_all('li')
17+
for li in lis:
18+
a = li.find('a', class_ = 'video-wrap statpid');
19+
href = a.get('href')
20+
title = a.get('title')
21+
# 去掉文件名中的特殊字符
22+
title = validate_title(title)
23+
with open(url_file_name,'a',encoding = 'utf-8') as f:
24+
f.write(title + '|' + href + '\n')
25+
print("已经抓取了 {} 页".format(p + 1))
26+
time.sleep(random.randint(1, 9)/10)
27+
28+
def validate_title(title):
29+
rstr = r"[\/\\\:\*\?\"\<\>\|]"
30+
new_title = re.sub(rstr, "", title)
31+
return new_title
32+
33+
def get_video_url():
34+
urls_file = open(url_file_name, 'r', encoding='utf-8')
35+
url_lines = urls_file.readlines()
36+
urls_file.close()
37+
38+
video_urls = []
39+
for line in url_lines:
40+
# 视频名字 | 地址
41+
infos = line.split('|')
42+
video_id = infos[1].replace('.html\n', '').replace('/play/', '');
43+
data = requests.get('https://v-api-player-ssl.huya.com/?r=vhuyaplay%2Fvideo&vid={}&format=mp4%2Cm3u8'.format(video_id))
44+
data = json.loads(data.text)
45+
46+
url = data['result']['items'][0]['transcode']['urls'][0]
47+
video_urls.append({'title': infos[0], 'url':url})
48+
49+
return video_urls
50+
51+
def save_video(video_urls):
52+
for item in video_urls:
53+
title = item.get('title')
54+
print('正在下载:{}'.format(title))
55+
html = requests.get(item.get('url'))
56+
data = html.content
57+
with open('D:\\{}.mp4'.format(title), 'wb') as f:
58+
f.write(data)
59+
print('全部下载完成了')
60+
61+
if __name__ == "__main__":
62+
get_list()
63+
video_urls = get_video_url()
64+
save_video(video_urls)

0 commit comments

Comments
 (0)