Python案例怎么重启异常进程?

wen python案例 75

本文目录导读:

Python案例怎么重启异常进程?

  1. 使用子进程管理(推荐)
  2. 使用supervisor管理(生产环境推荐)
  3. 使用进程管理器类
  4. 使用Python-daemon实现守护进程
  5. Docker方式(现代推荐)
  6. 最佳实践建议
  7. 生产环境推荐方案

在Python中重启异常进程有多种方法,我来介绍几种常见且实用的方案:

使用子进程管理(推荐)

基础版本

import subprocess
import time
import sys
def run_script():
    """运行需要监控的脚本"""
    try:
        # 运行另一个Python脚本
        process = subprocess.Popen([sys.executable, 'your_script.py'])
        process.wait()
    except Exception as e:
        print(f"进程异常退出: {e}")
        return False
    return process.returncode == 0
def monitor_and_restart():
    """监控并重启进程"""
    max_restarts = 5  # 最大重启次数
    restart_count = 0
    while restart_count < max_restarts:
        print(f"启动进程 (第{restart_count + 1}次)")
        if run_script():
            print("进程正常退出")
            break
        else:
            restart_count += 1
            print(f"进程异常,{restart_count}秒后重启...")
            time.sleep(restart_count * 2)  # 指数退避
    if restart_count >= max_restarts:
        print("达到最大重启次数,停止监控")
if __name__ == "__main__":
    monitor_and_restart()

增强版本(带日志和邮件通知)

import subprocess
import time
import sys
import logging
import smtplib
from datetime import datetime
from pathlib import Path
class ProcessMonitor:
    def __init__(self, script_path, max_restarts=5, restart_delay=5):
        self.script_path = script_path
        self.max_restarts = max_restarts
        self.restart_delay = restart_delay
        self.restart_count = 0
        self.setup_logging()
    def setup_logging(self):
        """配置日志"""
        log_file = Path(f"process_monitor_{datetime.now().strftime('%Y%m%d')}.log")
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(__name__)
    def send_alert(self, message):
        """发送告警(可选)"""
        try:
            # 这里可以集成邮件、钉钉、企业微信等通知
            pass
        except Exception as e:
            self.logger.error(f"发送告警失败: {e}")
    def run_process(self):
        """运行目标进程"""
        try:
            process = subprocess.Popen(
                [sys.executable, self.script_path],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            # 非阻塞方式读取输出
            stdout, stderr = process.communicate(timeout=3600)  # 1小时超时
            if process.returncode != 0:
                self.logger.error(f"进程退出码: {process.returncode}")
                if stderr:
                    self.logger.error(f"错误输出: {stderr.decode()}")
                return False
            return True
        except subprocess.TimeoutExpired:
            self.logger.warning("进程超时,强制终止")
            process.kill()
            return False
        except Exception as e:
            self.logger.error(f"运行进程异常: {e}")
            return False
    def start_monitoring(self):
        """开始监控"""
        self.logger.info(f"开始监控进程: {self.script_path}")
        while self.restart_count < self.max_restarts:
            self.restart_count += 1
            self.logger.info(f"第{self.restart_count}次启动进程")
            if self.run_process():
                self.logger.info("进程正常退出")
                break
            else:
                if self.restart_count < self.max_restarts:
                    delay = self.restart_delay * self.restart_count
                    self.logger.info(f"{delay}秒后重启...")
                    time.sleep(delay)
                else:
                    self.logger.error("达到最大重启次数")
                    self.send_alert(f"进程 {self.script_path} 重启失败")
        self.logger.info("监控结束")
# 使用示例
monitor = ProcessMonitor('your_script.py', max_restarts=3, restart_delay=5)
monitor.start_monitoring()

使用supervisor管理(生产环境推荐)

supervisor配置示例

[program:my_python_app]
command=python /path/to/your_script.py
directory=/path/to/app
autostart=true
autorestart=true
startretries=3
stopasgroup=true
killasgroup=true
user=www-data
stdout_logfile=/var/log/myapp.log
stderr_logfile=/var/log/myapp_error.log

使用进程管理器类

import psutil
import time
import os
import signal
class ProcessManager:
    def __init__(self, process_name, script_path):
        self.process_name = process_name
        self.script_path = script_path
        self.process = None
    def start(self):
        """启动进程"""
        self.process = subprocess.Popen(
            [sys.executable, self.script_path],
            preexec_fn=os.setsid  # 创建新的进程组
        )
        return self.process.pid
    def stop(self):
        """停止进程"""
        if self.process:
            os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
    def is_running(self):
        """检查进程是否运行"""
        if self.process:
            return self.process.poll() is None
        return False
    def restart(self):
        """重启进程"""
        self.stop()
        time.sleep(2)
        return self.start()
    def monitor(self, check_interval=10):
        """持续监控"""
        while True:
            if not self.is_running():
                print(f"进程 {self.process_name} 已停止,准备重启")
                pid = self.start()
                print(f"进程已重启,新PID: {pid}")
            time.sleep(check_interval)
# 使用示例
manager = ProcessManager("myapp", "your_script.py")
manager.monitor()

使用Python-daemon实现守护进程

import daemon
import lockfile
import time
import sys
def run_forever():
    """需要监控的主程序"""
    while True:
        try:
            # 你的主要业务逻辑
            print("程序运行中...")
            time.sleep(5)
        except Exception as e:
            print(f"异常: {e}")
            time.sleep(3)
def create_daemon():
    """创建守护进程"""
    context = daemon.DaemonContext(
        working_directory='/path/to/app',
        umask=0o002,
        pidfile=lockfile.FileLock('/var/run/myapp.pid'),
    )
    with context:
        run_forever()
if __name__ == "__main__":
    create_daemon()

Docker方式(现代推荐)

Dockerfile

FROM python:3.9
WORKDIR /app
COPY . /app
RUN pip install -r requirements.txt
# 使用supervisor管理进程
RUN apt-get update && apt-get install -y supervisor
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

最佳实践建议

  1. 谨慎设置重启次数:避免无限重启导致资源耗尽
  2. 实现指数退避:重启间隔逐渐增加
  3. 记录详细日志:便于故障排查
  4. 设置进程超时:防止僵尸进程
  5. 监控资源使用:内存泄漏、CPU过高等
  6. 使用信号处理:优雅关闭进程
  7. 考虑使用专门的进程管理工具:如supervisor、systemd等

生产环境推荐方案

  • 小型项目:使用subprocess + 简单监控脚本
  • 中型项目:使用supervisorsystemd
  • 大型项目:使用KubernetesDocker Swarm编排

选择哪种方案取决于你的具体需求和环境复杂度。

抱歉,评论功能暂时关闭!