断点续传脚本如何做?

wen 实用脚本 50

本文目录导读:

断点续传脚本如何做?

  1. 核心原理
  2. 服务端支持
  3. Python 客户端实现
  4. JavaScript 前端实现
  5. 多线程断点续传(Python 进阶版)
  6. 注意事项

我来详细介绍断点续传的实现方法,主要从 HTTP 协议层面和文件操作层面来讲解。

核心原理

断点续传的核心是 HTTP Range 请求头,它允许客户端请求文件的某一部分而不是整个文件。

服务端支持

首先确保服务端支持 Range 请求:

# Python Flask 示例
from flask import Flask, Response, request
app = Flask(__name__)
@app.route('/download/<filename>')
def download_file(filename):
    file_path = f'/path/to/{filename}'
    file_size = os.path.getsize(file_path)
    # 获取 Range 头
    range_header = request.headers.get('Range', None)
    if range_header:
        # 解析 Range,如 "bytes=0-100"
        start, end = parse_range(range_header, file_size)
        # 读取文件指定部分
        data = read_file_range(file_path, start, end)
        return Response(
            data,
            206,  # Partial Content 状态码
            headers={
                'Content-Range': f'bytes {start}-{end}/{file_size}',
                'Content-Length': end - start + 1,
            }
        )
    else:
        # 返回完整文件
        return Response(
            read_file(file_path),
            200,
            headers={
                'Content-Length': file_size,
                'Accept-Ranges': 'bytes'
            }
        )

Python 客户端实现

import os
import requests
from typing import Optional
class ResumeDownloader:
    def __init__(self, url: str, save_path: str):
        self.url = url
        self.save_path = save_path
        self.temp_path = save_path + '.tmp'  # 临时文件
        self.downloaded_size = 0
    def get_file_size(self) -> Optional[int]:
        """获取文件总大小"""
        try:
            response = requests.head(self.url)
            content_length = response.headers.get('Content-Length')
            return int(content_length) if content_length else None
        except:
            return None
    def get_local_size(self) -> int:
        """获取已下载的文件大小"""
        if os.path.exists(self.temp_path):
            return os.path.getsize(self.temp_path)
        return 0
    def download(self):
        """主下载函数"""
        # 获取文件总大小
        total_size = self.get_file_size()
        if not total_size:
            print("无法获取文件大小")
            return False
        # 获取本地已下载大小
        self.downloaded_size = self.get_local_size()
        if self.downloaded_size >= total_size:
            print("文件已完全下载")
            return True
        # 设置 Range 请求头
        headers = {
            'Range': f'bytes={self.downloaded_size}-'
        }
        # 以流式方式发送请求
        response = requests.get(
            self.url, 
            headers=headers, 
            stream=True,
            timeout=30
        )
        if response.status_code == 206:  # Partial Content
            print(f"开始断点续传,从 {self.downloaded_size} 字节继续")
            # 以追加模式打开文件
            mode = 'ab' if self.downloaded_size > 0 else 'wb'
            with open(self.temp_path, mode) as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
                        self.downloaded_size += len(chunk)
                        # 打印进度
                        progress = (self.downloaded_size / total_size) * 100
                        print(f"\r下载进度: {progress:.2f}%", end='')
            print("\n下载完成!")
            # 下载完成后重命名
            os.rename(self.temp_path, self.save_path)
            return True
        else:
            print(f"服务器不支持断点续传,状态码: {response.status_code}")
            return False
# 使用示例
downloader = ResumeDownloader(
    url='https://example.com/large-file.zip',
    save_path='./downloads/large-file.zip'
)
downloader.download()

JavaScript 前端实现

class ResumeDownload {
    constructor(url, savePath) {
        this.url = url;
        this.savePath = savePath;
        this.db = null; // 使用 IndexedDB 存储下载状态
    }
    async getFileSize() {
        const response = await fetch(this.url, { method: 'HEAD' });
        return parseInt(response.headers.get('content-length'), 10);
    }
    async getDownloadedSize() {
        // 从 IndexedDB 或 localStorage 获取已下载大小
        const saved = localStorage.getItem(`download_${this.url}`);
        return saved ? parseInt(saved, 10) : 0;
    }
    async download() {
        const totalSize = await this.getFileSize();
        let downloadedSize = await this.getDownloadedSize();
        if (downloadedSize >= totalSize) {
            console.log('文件已完全下载');
            return;
        }
        const response = await fetch(this.url, {
            headers: {
                'Range': `bytes=${downloadedSize}-`
            }
        });
        if (response.status === 206) {
            const reader = response.body.getReader();
            const chunks = [];
            while (true) {
                const { done, value } = await reader.read();
                if (done) break;
                chunks.push(value);
                downloadedSize += value.length;
                // 保存进度
                localStorage.setItem(`download_${this.url}`, downloadedSize.toString());
                // 更新进度显示
                const progress = (downloadedSize / totalSize) * 100;
                console.log(`进度: ${progress.toFixed(2)}%`);
            }
            // 组合所有 chunks 并保存
            const blob = new Blob(chunks);
            // 使用 File API 或发送到服务端保存
            console.log('下载完成!');
        }
    }
}

多线程断点续传(Python 进阶版)

import threading
import requests
import os
class MultiThreadResumeDownload:
    def __init__(self, url, save_path, num_threads=4):
        self.url = url
        self.save_path = save_path
        self.num_threads = num_threads
        self.lock = threading.Lock()
        self.progress = 0
    def get_file_size(self):
        response = requests.head(self.url)
        return int(response.headers.get('Content-Length', 0))
    def download_part(self, start_byte, end_byte, thread_id):
        headers = {'Range': f'bytes={start_byte}-{end_byte}'}
        response = requests.get(self.url, headers=headers, stream=True)
        # 计算该线程要写入的文件位置
        with self.lock:
            with open(self.save_path, 'r+b') as f:
                f.seek(start_byte)
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
                        self.progress += len(chunk)
                        print(f"\r线程 {thread_id}: 进度 {self.progress/self.get_file_size()*100:.2f}%", end='')
    def download(self):
        file_size = self.get_file_size()
        # 创建空文件
        with open(self.save_path, 'wb') as f:
            f.truncate(file_size)
        # 计算每个线程的范围
        part_size = file_size // self.num_threads
        threads = []
        for i in range(self.num_threads):
            start = i * part_size
            end = start + part_size - 1 if i < self.num_threads - 1 else file_size - 1
            thread = threading.Thread(
                target=self.download_part,
                args=(start, end, i)
            )
            threads.append(thread)
            thread.start()
        # 等待所有线程完成
        for thread in threads:
            thread.join()
        print("\n多线程下载完成!")

注意事项

服务器支持

  • 检查 Accept-Ranges: bytes 响应头
  • 服务器返回 206 Partial Content 状态码

文件完整性

  • 下载完成后验证文件哈希值(MD5/SHA256)
  • 使用临时文件,完成后重命名

错误处理

def safe_download(self):
    retry_count = 0
    max_retries = 3
    while retry_count < max_retries:
        try:
            self.download()
            break
        except requests.exceptions.ConnectionError:
            retry_count += 1
            print(f"连接失败,重试 {retry_count}/{max_retries}")
            time.sleep(2)
        except Exception as e:
            print(f"下载错误: {e}")
            break

缓存策略

  • 使用 If-Range 头验证文件是否变更
  • 记录下载元数据(URL、已下载大小、时间戳)

这个实现涵盖了断点续传的核心功能,可以根据具体需求进行调整和扩展。

抱歉,评论功能暂时关闭!