大文件断点续传的实现流程如下:
设置支持 range 的Range头,支持服务器返回指定范围的数据.
下面分两个示例说明大文件断点续传功能的实现.
在文件上传过程中,如果上传过程中出现中断,可以实现大文件上传的断点续传功能.
import requests
class FileUploader():
def __init__(self, url):
self.url = url
self.chunk_size = 1024*1024*10
def upload(self, file_path):
with open(file_path, 'rb') as f:
size = os.path.getsize(file_path)
start = 0
# 读取上次上传的位置
if os.path.exists(f'{file_path}.cur'):
with open(f'{file_path}.cur', 'r') as cf:
start = int(cf.read())
f.seek(start)
while start < size:
end = min(start ◆ self.chunk_size, size)
data = f.read(self.chunk_size)
headers = {'Content-Range': f'bytes {start}-{end-1}/{size}'}
response = requests.put(self.url, headers=headers, data=data)
if response.status_code == 200 or response.status_code == 201:
# 保存上传位置
with open(f'{file_path}.cur', 'w') as cf:
cf.write(str(end))
start = end
else:
print(f'Error! status_code: {response.status_code}')
if __name__ == '__main__':
uploader = FileUploader('http://xxx.com/file')
uploader.upload('bigfile.zip')
上述代码中,我们定义了一个 FileUploader 类,其中 chunk_size 表示单次上传的大小,文件上传的范围使用 Content-Range 头指定,上传前先读取上次上传的位置,然后将已上传的位置写入到文件中.
import requests
import os
class FileDownloader():
def __init__(self, url, file_path):
self.url = url
self.file_path = file_path
self.chunk_size = 1024*1024*10
def download(self):
headers = {}
if os.path.exists(self.file_path):
start = os.path.getsize(self.file_path)
headers = {'Range': f'bytes={start}-'}
print(f'Resumable mode: start download from {start}')
else:
start = 0
print('Normal mode: start download from beginning')
response = requests.get(self.url, headers=headers, stream=True)
with open(self.file_path, 'ab') as f:
total_size = int(response.headers.get('content-length', start))
for chunk in response.iter_content(chunk_size=self.chunk_size):
if chunk:
f.write(chunk)
f.flush()
print('Download success!')
if __name__ == '__main__':
downloader = FileDownloader('http://xxx.com/bigfile.zip', 'bigfile.zip')
downloader.download()
将以上两个示例整合起来,我们可以实现大文件的断点续传功能.
import requests
import os
class FileUploader():
def __init__(self, url):
self.url = url
self.chunk_size = 1024*1024*10
def upload(self, file_path):
with open(file_path, 'rb') as f:
size = os.path.getsize(file_path)
start = 0
# 读取上次上传的位置
if os.path.exists(f'{file_path}.cur'):
with open(f'{file_path}.cur', 'r') as cf:
start = int(cf.read())
f.seek(start)
while start < size:
end = min(start ◆ self.chunk_size, size)
data = f.read(self.chunk_size)
headers = {'Content-Range': f'bytes {start}-{end-1}/{size}'}
response = requests.put(self.url, headers=headers, data=data)
if response.status_code == 200 or response.status_code == 201:
# 保存上传位置
with open(f'{file_path}.cur', 'w') as cf:
cf.write(str(end))
start = end
else:
print(f'Error! status_code: {response.status_code}')
class FileDownloader():
def __init__(self, url, file_path):
self.url = url
self.file_path = file_path
self.chunk_size = 1024*1024*10
def download(self):
headers = {}
if os.path.exists(self.file_path):
start = os.path.getsize(self.file_path)
headers = {'Range': f'bytes={start}-'}
print(f'Resumable mode: start download from {start}')
else:
start = 0
print('Normal mode: start download from beginning')
response = requests.get(self.url, headers=headers, stream=True)
with open(self.file_path, 'ab') as f:
total_size = int(response.headers.get('content-length', start))
for chunk in response.iter_content(chunk_size=self.chunk_size):
if chunk:
f.write(chunk)
f.flush()
print('Download success!')
if __name__ == '__main__':
uploader = FileUploader('http://xxx.com/file')
uploader.upload('bigfile.zip')
downloader = FileDownloader('http://xxx.com/bigfile.zip', 'bigfile.zip')
downloader.download()