分别提供了单线程下载,和异步下载
只需要在数组 urlData[ ] 中添加需要下载的链接即可
注意:批量下载的时候尽量加延迟,避免网站有反爬机制封掉ip
# -*- coding: utf-8 -*-
import requests
from tqdm import tqdm
import os
import base64
from cryptography.fernet import Fernet
import aiohttp
import asyncio
import uuid
import uvloop
from pymysql import *
try:
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
except ImportError:
pass
# 单线程下载
def downloadByUrl(url, downloadPath, fileName):
# 完整路径+文件名
dst = downloadPath + fileName
'''同步'''
response = requests.get(url, stream=True)
file_size = int(response.headers['content-length'])
if os.path.exists(dst):
first_byte = os.path.getsize(dst)
else:
first_byte = 0
if first_byte >= file_size:
return file_size
header = {"Range": f"bytes={first_byte}-{file_size}"}
pbar = tqdm(
total=file_size, initial=first_byte,
unit='B', unit_scale=True, desc=dst)
req = requests.get(url, headers=header, stream=True)
with(open(dst, 'ab')) as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
# 异步下载
async def async_download_from_url(url, downloadPath, fileName):
# 完整路径+文件名
totalPath = downloadPath + fileName
'''异步'''
async with aiohttp.connector.TCPConnector(limit=300, force_close=True, enable_cleanup_closed=True) as tc:
async with aiohttp.ClientSession(connector=tc) as session:
req = await fetch(session, url, totalPath)
file_size = int(req.headers['content-length'])
print(f"获取视频总长度:{file_size}")
if os.path.exists(totalPath):
first_byte = os.path.getsize(totalPath)
else:
first_byte = 0
if first_byte >= file_size:
return file_size
header = {"Range": f"bytes={first_byte}-{file_size}"}
pbar = tqdm(
total=file_size, initial=first_byte,
unit='B', unit_scale=True, desc=totalPath)
await fetch(session, url, totalPath, pbar=pbar, headers=header)
async def fetch(session, url, dst, pbar=None, headers=None):
if headers:
async with session.get(url, headers=headers) as req:
with(open(dst, 'ab')) as f:
while True:
chunk = await req.content.read(1024)
if not chunk:
break
f.write(chunk)
pbar.update(1024)
pbar.close()
else:
async with session.get(url) as req:
return req
if __name__ == '__main__':
# 保存位置(先创建好文件夹)
downloadPath = "/Users/renyuxin/Downloads/"
# mp4链接列表(注意!只能是带有mp4后缀的链接才能正常下载)
urlData = [
"http://ryx-smart.oss-cn-beijing.aliyuncs.com/app/video/girl-video/31350AAACBA0460B81E06E8C5D40A04D.mp4",
"http://ryx-smart.oss-cn-beijing.aliyuncs.com/app/video/girl-video/45F972192C1E4332B575DD22FB197306.mp4"
]
# 方法一:单线程下载
for item in urlData:
fileName = f"{uuid.uuid1()}.mp4"
downloadByUrl(urlData[0], downloadPath, fileName);
print(" =============================================== ")
# 方法二:多线程下载
for index in range(len(urlData)):
# 文件名(uuid)
fileName = f"{uuid.uuid1()}.mp4"
task = [asyncio.ensure_future(async_download_from_url(urlData[index], downloadPath, fileName))]
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(task))
except:
loop.run_until_complete(loop.shutdown_asyncgens())
# 循环下载完成后关闭所有线程
loop.close();