|
| 1 | +import requests |
| 2 | +from requests.models import requote_uri |
| 3 | +from bs4 import BeautifulSoup |
| 4 | +import time |
| 5 | +import random |
| 6 | +import json |
| 7 | +import re |
| 8 | + |
| 9 | +url_file_name = 'D:\\url.txt' |
| 10 | + |
| 11 | +def get_list(): |
| 12 | + for p in range(500): |
| 13 | + html = requests.get('https://v.huya.com/g/all?set_id=31&order=hot&page={}'.format(p+1)); |
| 14 | + soup = BeautifulSoup(html.text, 'html.parser') |
| 15 | + ul = soup.find('ul', class_='vhy-video-list w215 clearfix') |
| 16 | + lis = ul.find_all('li') |
| 17 | + for li in lis: |
| 18 | + a = li.find('a', class_ = 'video-wrap statpid'); |
| 19 | + href = a.get('href') |
| 20 | + title = a.get('title') |
| 21 | + # 去掉文件名中的特殊字符 |
| 22 | + title = validate_title(title) |
| 23 | + with open(url_file_name,'a',encoding = 'utf-8') as f: |
| 24 | + f.write(title + '|' + href + '\n') |
| 25 | + print("已经抓取了 {} 页".format(p + 1)) |
| 26 | + time.sleep(random.randint(1, 9)/10) |
| 27 | + |
| 28 | +def validate_title(title): |
| 29 | + rstr = r"[\/\\\:\*\?\"\<\>\|]" |
| 30 | + new_title = re.sub(rstr, "", title) |
| 31 | + return new_title |
| 32 | + |
| 33 | +def get_video_url(): |
| 34 | + urls_file = open(url_file_name, 'r', encoding='utf-8') |
| 35 | + url_lines = urls_file.readlines() |
| 36 | + urls_file.close() |
| 37 | + |
| 38 | + video_urls = [] |
| 39 | + for line in url_lines: |
| 40 | + # 视频名字 | 地址 |
| 41 | + infos = line.split('|') |
| 42 | + video_id = infos[1].replace('.html\n', '').replace('/play/', ''); |
| 43 | + data = requests.get('https://v-api-player-ssl.huya.com/?r=vhuyaplay%2Fvideo&vid={}&format=mp4%2Cm3u8'.format(video_id)) |
| 44 | + data = json.loads(data.text) |
| 45 | + |
| 46 | + url = data['result']['items'][0]['transcode']['urls'][0] |
| 47 | + video_urls.append({'title': infos[0], 'url':url}) |
| 48 | + |
| 49 | + return video_urls |
| 50 | + |
| 51 | +def save_video(video_urls): |
| 52 | + for item in video_urls: |
| 53 | + title = item.get('title') |
| 54 | + print('正在下载:{}'.format(title)) |
| 55 | + html = requests.get(item.get('url')) |
| 56 | + data = html.content |
| 57 | + with open('D:\\{}.mp4'.format(title), 'wb') as f: |
| 58 | + f.write(data) |
| 59 | + print('全部下载完成了') |
| 60 | + |
| 61 | +if __name__ == "__main__": |
| 62 | + get_list() |
| 63 | + video_urls = get_video_url() |
| 64 | + save_video(video_urls) |
0 commit comments