本文目录导读:

是的,实用脚本完全可以批量编码,以下给出几种常见场景的批量编码脚本方案,涵盖文件名转码、文件内容转码、视频/音频转码等需求。
批量转换文件编码(如 GBK → UTF-8)
Python 脚本
import os
import chardet
import codecs
def detect_encoding(file_path):
with open(file_path, 'rb') as f:
raw_data = f.read()
result = chardet.detect(raw_data)
return result['encoding']
def batch_convert_encoding(folder_path, target_encoding='utf-8'):
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.endswith(('.txt', '.csv', '.html', '.py', '.json', '.xml')): # 按需扩展
file_path = os.path.join(root, file)
try:
source_encoding = detect_encoding(file_path)
if source_encoding and source_encoding.lower() != target_encoding.lower():
with open(file_path, 'r', encoding=source_encoding) as f:
content = f.read()
with open(file_path, 'w', encoding=target_encoding) as f:
f.write(content)
print(f"✅ 已转换: {file_path} ({source_encoding} → {target_encoding})")
except Exception as e:
print(f"❌ 失败: {file_path} - {e}")
# 使用示例
batch_convert_encoding('./your_folder', 'utf-8')
安装依赖
pip install chardet
批量重命名文件(编码转换 + 规范化)
import os
import re
def batch_rename_with_encoding(folder_path):
"""批量修正文件名乱码(如将 Latin-1 误认为 UTF-8 显示)"""
for filename in os.listdir(folder_path):
if filename.startswith('.'): # 跳过隐藏文件
continue
old_path = os.path.join(folder_path, filename)
if os.path.isfile(old_path):
# 模拟文件名编码修复(将 latin-1 重新编码回原始字节再 decode)
try:
# 假设原文件名为 GBK 编码但被系统当作 UTF-8 显示
new_name = filename.encode('latin-1').decode('gbk') # 按需修改
new_path = os.path.join(folder_path, new_name)
os.rename(old_path, new_path)
print(f"重命名: {filename} → {new_name}")
except:
pass
# 使用前请备份!实际中根据乱码类型调整编码参数
批量转码视频/音频(FFmpeg + Shell)
Linux/macOS Shell 脚本
#!/bin/bash
input_folder="./videos"
output_folder="./converted"
mkdir -p "$output_folder"
for file in "$input_folder"/*.mp4; do
# 提取文件名(不含扩展名)
filename=$(basename "$file" .mp4)
# 转码为 H.264 + AAC,保持画质
ffmpeg -i "$file" -c:v libx264 -c:a aac -b:a 128k "$output_folder/${filename}_converted.mp4"
echo "已完成: $filename"
done
echo "批量转码完成!"
Windows Powershell 等效命令
$inputFolder = "C:\videos"
$outputFolder = "C:\converted"
New-Item -ItemType Directory -Force -Path $outputFolder
Get-ChildItem -Path $inputFolder -Filter *.mp4 | ForEach-Object {
$outputName = $_.BaseName + "_converted.mp4"
$outputPath = Join-Path $outputFolder $outputName
ffmpeg -i $_.FullName -c:v libx264 -c:a aac $outputPath
Write-Host "已完成: $($_.Name)"
}
综合实用脚本:批量编码检测 + 转码 + 报告
import os, codecs, json
from collections import defaultdict
import chardet
class BatchEncoder:
def __init__(self, extensions=('.txt', '.csv', '.html', '.md', '.json', '.xml', '.py')):
self.extensions = extensions
self.stats = defaultdict(int)
def scan(self, folder):
"""扫描并生成编码分布报告"""
for root, dirs, files in os.walk(folder):
for file in files:
if any(file.endswith(ext) for ext in self.extensions):
file_path = os.path.join(root, file)
enc = self._detect_encoding(file_path)
self.stats[enc] += 1
return dict(self.stats)
def _detect_encoding(self, file_path):
with open(file_path, 'rb') as f:
return chardet.detect(f.read(100000))['encoding'] or 'unknown'
def batch_convert(self, folder, target='utf-8', backup=True):
"""批量转换,可选备份"""
success = 0
failures = []
for root, dirs, files in os.walk(folder):
for file in files:
if not any(file.endswith(ext) for ext in self.extensions):
continue
file_path = os.path.join(root, file)
src_enc = self._detect_encoding(file_path)
if src_enc and src_enc.lower() != target.lower():
try:
with open(file_path, 'rb') as f:
binary = f.read()
text = binary.decode(src_enc)
if backup:
os.rename(file_path, file_path + '.bak')
with open(file_path, 'w', encoding=target) as f:
f.write(text)
print(f"✅ {file}: {src_enc} → {target}")
success += 1
except Exception as e:
failures.append((file_path, str(e)))
print(f"❌ {file}: {e}")
return success, failures
# 使用示例
encoder = BatchEncoder()
print("编码分布统计:", encoder.scan('/your/path'))
s, f = encoder.batch_convert('/your/path', target='utf-8', backup=True)
print(f"成功: {s}, 失败: {len(f)}")
实用建议
| 场景 | 推荐方案 | 注意事项 |
|---|---|---|
| 文本文件编码统一 | Python + chardet | 先备份,部分文件可能无法正确检测 |
| 文件名乱码修复 | 自定义 Python 脚本 | 需了解实际编码情况(GBK/Shift-JIS等) |
| 视频/音频转码 | FFmpeg 命令行 | 避免多次转码造成质量损失 |
| 批量图片格式/编码 | ImageMagick (convert) |
使用 -quality 参数控制压缩 |
| 大规模生产环境 | 使用企业级工具(如 iconv / enca) |
结合 find + parallel 实现并行 |
核心原则:批量操作前 务必先备份 或在一个测试文件夹中试用,因为编码转换不可逆,且自动检测可能出错。