本文目录导读:

- IMAP协议提取附件(Python + imaplib)
- POP3协议提取(适用于简单场景)
- 使用Outlook API(Exchange/Office 365)
- 使用Shell脚本(Linux/Mac)
- 安装和使用建议
我来为您介绍几种实用的邮件附件自动提取脚本方案,涵盖不同邮件协议和场景。
IMAP协议提取附件(Python + imaplib)
#!/usr/bin/env python3
"""
自动提取邮箱附件脚本 - IMAP协议
"""
import imaplib
import email
import os
from email.header import decode_header
import re
from datetime import datetime, timedelta
class EmailAttachmentExtractor:
def __init__(self, email_address, password, imap_server='imap.gmail.com'):
self.email_address = email_address
self.password = password
self.imap_server = imap_server
self.mail = None
def connect(self):
"""连接邮箱服务器"""
try:
self.mail = imaplib.IMAP4_SSL(self.imap_server)
self.mail.login(self.email_address, self.password)
return True
except Exception as e:
print(f"连接失败: {e}")
return False
def decode_mime_words(self, text):
"""解码邮件标题等MIME编码文本"""
try:
decoded_parts = decode_header(text)
result = []
for part, charset in decoded_parts:
if isinstance(part, bytes):
if charset:
result.append(part.decode(charset))
else:
result.append(part.decode('utf-8', errors='ignore'))
else:
result.append(part)
return ''.join(result)
except:
return str(text)
def sanitize_filename(self, filename):
"""清理文件名,移除非法字符"""
return re.sub(r'[<>:"/\\|?*]', '_', filename)
def extract_attachments(self, save_dir='downloads',
max_emails=50, days_ago=7,
allowed_extensions=None):
"""
提取邮件附件
Args:
save_dir: 保存目录
max_emails: 最大处理邮件数
days_ago: 提取最近N天的邮件
allowed_extensions: 允许的扩展名列表,如['pdf', 'xlsx']
"""
if not self.connect():
return
# 创建保存目录
os.makedirs(save_dir, exist_ok=True)
# 选择收件箱
self.mail.select('INBOX')
# 计算日期范围
since_date = (datetime.now() - timedelta(days=days_ago)).strftime("%d-%b-%Y")
# 搜索邮件
search_criteria = f'(SINCE {since_date})'
status, messages = self.mail.search(None, search_criteria)
if status != 'OK':
print("搜索邮件失败")
return
# 获取邮件ID列表(最新的在前)
email_ids = messages[0].split()
email_ids.reverse()
# 限制处理数量
email_ids = email_ids[:max_emails]
extracted_count = 0
for email_id in email_ids:
try:
# 获取邮件内容
status, msg_data = self.mail.fetch(email_id, '(RFC822)')
if status != 'OK':
continue
# 解析邮件
msg = email.message_from_bytes(msg_data[0][1])
# 获取邮件主题
subject = self.decode_mime_words(msg['Subject'])
print(f"处理邮件: {subject}")
# 处理附件
if msg.is_multipart():
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
# 获取文件名
filename = part.get_filename()
if filename:
# 解码文件名
filename = self.decode_mime_words(filename)
filename = self.sanitize_filename(filename)
# 检查扩展名
if allowed_extensions:
ext = filename.split('.')[-1].lower()
if ext not in allowed_extensions:
continue
# 保存附件
filepath = os.path.join(save_dir, filename)
# 避免文件名冲突
counter = 1
while os.path.exists(filepath):
name, ext = os.path.splitext(filename)
filepath = os.path.join(save_dir,
f"{name}_{counter}{ext}")
counter += 1
# 写入文件
with open(filepath, 'wb') as f:
f.write(part.get_payload(decode=True))
print(f" ✓ 已保存: {filename}")
extracted_count += 1
except Exception as e:
print(f"处理邮件 {email_id} 时出错: {e}")
continue
# 断开连接
self.mail.logout()
print(f"\n提取完成!共保存 {extracted_count} 个附件到 '{save_dir}' 目录")
return extracted_count
# 使用示例
if __name__ == "__main__":
# 配置信息
config = {
'email': 'your_email@gmail.com', # 替换为你的邮箱
'password': 'your_password', # 替换为你的密码或应用专用密码
'imap_server': 'imap.gmail.com', # 邮箱IMAP服务器
'save_dir': 'downloaded_attachments',
'max_emails': 100,
'days_ago': 30,
'allowed_extensions': ['pdf', 'docx', 'xlsx', 'zip'] # 指定要提取的附件类型
}
# 创建提取器实例
extractor = EmailAttachmentExtractor(
config['email'],
config['password'],
config['imap_server']
)
# 执行提取
extractor.extract_attachments(
save_dir=config['save_dir'],
max_emails=config['max_emails'],
days_ago=config['days_ago'],
allowed_extensions=config['allowed_extensions']
)
POP3协议提取(适用于简单场景)
#!/usr/bin/env python3
"""
POP3协议提取附件脚本
"""
import poplib
import email
import os
from email.header import decode_header
import re
def extract_attachments_pop3(email_address, password, pop3_server='pop.gmail.com',
port=995, save_dir='pop3_downloads', max_emails=20):
"""通过POP3协议提取附件"""
# 连接POP3服务器
try:
mail = poplib.POP3_SSL(pop3_server, port)
mail.user(email_address)
mail.pass_(password)
print(f"连接成功!邮箱有 {len(mail.list()[1])} 封邮件")
except Exception as e:
print(f"连接失败: {e}")
return
# 创建保存目录
os.makedirs(save_dir, exist_ok=True)
# 获取邮件数量
num_messages = len(mail.list()[1])
start = max(1, num_messages - max_emails + 1)
extracted_count = 0
for i in range(num_messages, start - 1, -1):
try:
# 获取邮件
response, lines, bytes = mail.retr(i)
msg_content = b'\n'.join(lines)
msg = email.message_from_bytes(msg_content)
# 处理附件
if msg.is_multipart():
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
if filename:
# 解码和清理文件名
try:
filename = decode_header(filename)[0][0]
if isinstance(filename, bytes):
filename = filename.decode('utf-8', errors='ignore')
except:
pass
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
# 保存文件
filepath = os.path.join(save_dir, filename)
with open(filepath, 'wb') as f:
f.write(part.get_payload(decode=True))
print(f"已保存: {filename}")
extracted_count += 1
except Exception as e:
print(f"处理邮件 {i} 时出错: {e}")
continue
mail.quit()
print(f"\n完成!共提取 {extracted_count} 个附件")
# 使用示例
# extract_attachments_pop3('your_email@gmail.com', 'your_password')
使用Outlook API(Exchange/Office 365)
#!/usr/bin/env python3
"""
使用Microsoft Graph API提取附件(适用于Office 365/Outlook)
需要安装: pip install requests msal
"""
import requests
import msal
import os
from datetime import datetime, timedelta
class OutlookAttachmentExtractor:
def __init__(self, client_id, client_secret, tenant_id):
self.client_id = client_id
self.client_secret = client_secret
self.tenant_id = tenant_id
self.access_token = None
def get_access_token(self):
"""获取OAuth2访问令牌"""
authority = f"https://login.microsoftonline.com/{self.tenant_id}"
app = msal.ConfidentialClientApplication(
self.client_id,
authority=authority,
client_credential=self.client_secret
)
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" in result:
self.access_token = result['access_token']
return True
else:
print(f"获取令牌失败: {result.get('error_description')}")
return False
def get_user_messages(self, user_email, days_back=7):
"""获取用户邮件列表"""
headers = {
'Authorization': f'Bearer {self.access_token}',
'Content-Type': 'application/json'
}
# 计算时间范围
since_date = (datetime.utcnow() - timedelta(days=days_back)).isoformat() + 'Z'
url = f"https://graph.microsoft.com/v1.0/users/{user_email}/messages"
params = {
'$filter': f"receivedDateTime ge {since_date} and hasAttachments eq true",
'$select': 'id,subject,receivedDateTime',
'$top': 50
}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
return response.json().get('value', [])
else:
print(f"获取邮件失败: {response.text}")
return []
def download_attachments(self, user_email, message_id, save_dir='outlook_downloads'):
"""下载指定邮件的附件"""
headers = {
'Authorization': f'Bearer {self.access_token}'
}
url = f"https://graph.microsoft.com/v1.0/users/{user_email}/messages/{message_id}/attachments"
response = requests.get(url, headers=headers)
if response.status_code != 200:
print(f"获取附件列表失败: {response.text}")
return
attachments = response.json().get('value', [])
for attachment in attachments:
if attachment.get('@odata.type') == '#microsoft.graph.fileAttachment':
filename = attachment['name']
content = attachment['contentBytes']
# 保存文件
os.makedirs(save_dir, exist_ok=True)
filepath = os.path.join(save_dir, filename)
# 解码Base64内容
import base64
with open(filepath, 'wb') as f:
f.write(base64.b64decode(content))
print(f"已保存: {filename}")
# 使用示例(需要Azure AD注册应用)
# extractor = OutlookAttachmentExtractor('client_id', 'client_secret', 'tenant_id')
# extractor.get_access_token()
# messages = extractor.get_user_messages('user@company.com')
# for msg in messages:
# extractor.download_attachments('user@company.com', msg['id'])
使用Shell脚本(Linux/Mac)
#!/bin/bash
# 自动提取邮件附件脚本(配合getmail或fetchmail使用)
# 配置
MAIL_DIR="$HOME/Maildir"
ATTACH_DIR="$HOME/attachments"
LOG_FILE="$HOME/attachment_extract.log"
# 支持的附件扩展名
EXTENSIONS="pdf|docx|xlsx|zip|jpg|png"
# 创建目录
mkdir -p "$ATTACH_DIR"
# 查找并提取附件
find "$MAIL_DIR" -type f -name "*.eml" -o -name "*.mail" | while read mailfile; do
echo "处理: $mailfile" >> "$LOG_FILE"
# 使用munpack提取附件(需要安装)
munpack -f -t "$mailfile" -C "$ATTACH_DIR" 2>/dev/null
# 或者使用转码工具
# ripmime -i "$mailfile" -d "$ATTACH_DIR" 2>/dev/null
done
# 按扩展名筛选附件
find "$ATTACH_DIR" -type f | while read file; do
ext="${file##*.}"
if ! echo "$ext" | grep -qiE "^($EXTENSIONS)$"; then
rm "$file"
echo "删除不需要的文件: $file" >> "$LOG_FILE"
fi
done
# 重命名文件(避免乱码)
find "$ATTACH_DIR" -type f | while read file; do
newname=$(echo "$file" | iconv -f utf-8 -t ascii//TRANSLIT 2>/dev/null)
if [ "$file" != "$newname" ]; then
mv "$file" "$newname" 2>/dev/null
fi
done
echo "完成!附件保存到: $ATTACH_DIR" | tee -a "$LOG_FILE"
安装和使用建议
Python脚本依赖安装
# 安装必要库 pip install python-dotenv # 用于管理环境变量 # 对于IMGAP/POPS使用标准库,无需额外安装 # 对于Microsoft Graph API pip install requests msal
环境变量配置(推荐)
# 创建 .env 文件
# EMAIL=your_email@gmail.com
# PASSWORD=your_app_password
# IMAP_SERVER=imap.gmail.com
# 在脚本中加载
from dotenv import load_dotenv
import os
load_dotenv()
EMAIL = os.getenv('EMAIL')
PASSWORD = os.getenv('PASSWORD')
安全注意事项
- 使用应用专用密码(Gmail等支持)
- 使用环境变量或加密配置,不要硬编码密码
- OAuth2认证更安全(推荐用于企业邮箱)
- 设置合理的权限,只读取必要的邮件
- 处理敏感信息时注意合规性
自动化设置(crontab示例)
# 每天凌晨2点运行 0 2 * * * /usr/bin/python3 /path/to/extract_attachments.py # 每小时运行一次 0 * * * * /usr/bin/python3 /path/to/extract_attachments.py
选择哪种方案取决于您的邮件服务提供商和使用场景,Gmail、QQ邮箱等推荐使用IMAP方案,企业邮箱推荐使用API方案。