Python案例如何实现风格迁移？

wen python案例 2026-06-16 67

本文目录导读：

Python案例如何实现风格迁移？

使用预训练模型（推荐 - 最简单）
使用PyTorch Hub（更简洁）
使用TensorFlow/Keras实现
快速实现（使用第三方库）
交互式Web应用（Flask）
性能优化技巧

我来详细介绍Python实现风格迁移的几种主流方法。

使用预训练模型（推荐 - 最简单）

安装依赖

pip install torch torchvision pillow
pip install opencv-python numpy

使用VGG19实现神经风格迁移

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy
import matplotlib.pyplot as plt
class StyleTransfer:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.cnn = models.vgg19(weights=models.VGG19_Weights.DEFAULT).features.to(self.device).eval()
        # 定义需要提取特征的层
        self.content_layers = ['conv_4']
        self.style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
    def load_image(self, img_path, size=512):
        """加载并预处理图片"""
        image = Image.open(img_path).convert('RGB')
        transform = transforms.Compose([
            transforms.Resize((size, size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                               std=[0.229, 0.224, 0.225])
        ])
        image = transform(image).unsqueeze(0)
        return image.to(self.device, torch.float)
    def show_image(self, tensor, title=None):
        """显示图片"""
        image = tensor.cpu().clone()
        image = image.squeeze(0)
        # 反归一化
        unnormalize = transforms.Normalize(
            mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
            std=[1/0.229, 1/0.224, 1/0.225]
        )
        image = unnormalize(image)
        image = torch.clamp(image, 0, 1)
        plt.figure(figsize=(10, 10))
        plt.imshow(image.permute(1, 2, 0))
        if title:
            plt.title(title)
        plt.axis('off')
        plt.show()
    def get_features(self, image, model, layers):
        """提取指定层的特征"""
        features = {}
        x = image
        layer_count = 0
        for name, layer in model._modules.items():
            x = layer(x)
            layer_count += 1
            if isinstance(layer, nn.Conv2d):
                layer_name = f'conv_{layer_count}'
                if layer_name in layers:
                    features[layer_name] = x
        return features
    def gram_matrix(self, tensor):
        """计算Gram矩阵（用于风格特征）"""
        b, c, h, w = tensor.size()
        features = tensor.view(b, c, h * w)
        gram = torch.bmm(features, features.transpose(1, 2))
        return gram / (c * h * w)
    def transfer_style(self, content_path, style_path, num_steps=500, style_weight=1e6, content_weight=1):
        """执行风格迁移"""
        # 加载图片
        content_img = self.load_image(content_path)
        style_img = self.load_image(style_path, size=512)
        # 初始化目标图像为内容图像的副本
        target_img = content_img.clone().requires_grad_(True)
        # 提取特征
        content_features = self.get_features(content_img, self.cnn, self.content_layers)
        style_features = self.get_features(style_img, self.cnn, self.style_layers)
        # 计算风格特征的Gram矩阵
        style_grams = {}
        for layer in self.style_layers:
            style_grams[layer] = self.gram_matrix(style_features[layer])
        # 优化器
        optimizer = optim.Adam([target_img], lr=0.003)
        # 训练循环
        for step in range(num_steps):
            target_features = self.get_features(target_img, self.cnn, 
                                              self.content_layers + self.style_layers)
            # 内容损失
            content_loss = torch.mean((target_features['conv_4'] - content_features['conv_4']) ** 2)
            # 风格损失
            style_loss = 0
            for layer in self.style_layers:
                target_gram = self.gram_matrix(target_features[layer])
                style_gram = style_grams[layer]
                layer_style_loss = torch.mean((target_gram - style_gram) ** 2)
                style_loss += layer_style_loss / len(self.style_layers)
            # 总损失
            total_loss = content_weight * content_loss + style_weight * style_loss
            # 优化
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
            if step % 50 == 0:
                print(f'Step [{step}/{num_steps}], Content Loss: {content_loss.item():.4f}, '
                      f'Style Loss: {style_loss.item():.4f}')
        return target_img.detach()
# 使用示例
if __name__ == "__main__":
    # 初始化
    transfer = StyleTransfer()
    # 准备图片
    content_path = "content.jpg"  # 内容图片路径
    style_path = "style.jpg"      # 风格图片路径
    # 执行风格迁移
    result = transfer.transfer_style(content_path, style_path, num_steps=500)
    # 显示结果
    transfer.show_image(result, "风格迁移结果")
    # 保存结果
    result_img = result.cpu().squeeze(0)
    unnormalize = transforms.Normalize(
        mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
        std=[1/0.229, 1/0.224, 1/0.225]
    )
    result_img = unnormalize(result_img)
    result_img = torch.clamp(result_img, 0, 1)
    transforms.ToPILImage()(result_img).save("result.jpg")

使用PyTorch Hub（更简洁）

import torch
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
# 加载预训练模型
model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg19', pretrained=True)
model.eval()
def style_transfer_pytorch_hub(content_img, style_img, num_steps=300):
    """使用PyTorch Hub的简化版本"""
    # 参考上面的实现，但模型加载方式不同
    # 这里使用小技巧：直接使用已经训练好的风格迁移网络
    from torchvision.models import vgg19
    # 加载预训练的风格迁移模型（需要额外安装）
    # !pip install fast-style-transfer
    from style_transfer import StyleTransferModel
    model = StyleTransferModel()
    model.load_state_dict(torch.load('pretrained_model.pth'))
    result = model.transfer(content_img, style_img)
    return result

使用TensorFlow/Keras实现

import tensorflow as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
class StyleTransferTF:
    def __init__(self):
        # 加载预训练VGG19
        self.vgg = tf.keras.applications.VGG19(
            include_top=False,
            weights='imagenet'
        )
        # 定义内容和风格层
        self.content_layers = ['block4_conv2']
        self.style_layers = [
            'block1_conv1',
            'block2_conv1', 
            'block3_conv1',
            'block4_conv1',
            'block5_conv1'
        ]
        # 创建特征提取模型
        self.num_content_layers = len(self.content_layers)
        self.num_style_layers = len(self.style_layers)
    def load_img(self, path_to_img, max_dim=512):
        """加载图片"""
        img = Image.open(path_to_img)
        long = max(img.size)
        scale = max_dim / long
        img = img.resize((round(img.size[0]*scale), round(img.size[1]*scale)))
        img = tf.keras.preprocessing.image.img_to_array(img)
        img = np.expand_dims(img, axis=0)
        return img
    def preprocess_img(self, img):
        """预处理图片"""
        img = tf.keras.applications.vgg19.preprocess_input(img)
        return img
    def deprocess_img(self, processed_img):
        """反预处理图片"""
        x = processed_img.copy()
        if len(x.shape) == 4:
            x = np.squeeze(x, 0)
        x[:, :, 0] += 103.939
        x[:, :, 1] += 116.779
        x[:, :, 2] += 123.68
        x = x[:, :, ::-1]
        x = np.clip(x, 0, 255).astype('uint8')
        return x
    def get_model(self):
        """创建特征提取模型"""
        vgg = self.vgg
        vgg.trainable = False
        style_outputs = [vgg.get_layer(name).output for name in self.style_layers]
        content_outputs = [vgg.get_layer(name).output for name in self.content_layers]
        model_outputs = style_outputs + content_outputs
        return tf.keras.Model(vgg.input, model_outputs)
    def gram_matrix(self, input_tensor):
        """计算Gram矩阵"""
        result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
        input_shape = tf.shape(input_tensor)
        num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
        return result / num_locations
    def style_content_loss(self, outputs, style_targets, content_targets, 
                          style_weight=1e-2, content_weight=1e4):
        """计算风格和内容损失"""
        style_outputs = outputs[:self.num_style_layers]
        content_outputs = outputs[self.num_style_layers:]
        style_loss = tf.add_n([
            tf.reduce_mean((self.gram_matrix(style_output) - style_target)**2)
            for style_output, style_target in zip(style_outputs, style_targets)
        ])
        style_loss *= style_weight / self.num_style_layers
        content_loss = tf.add_n([
            tf.reduce_mean((content_output - content_target)**2)
            for content_output, content_target in zip(content_outputs, content_targets)
        ])
        content_loss *= content_weight / self.num_content_layers
        return style_loss + content_loss
    def transfer_style(self, content_path, style_path, epochs=10, steps_per_epoch=100):
        """执行风格迁移"""
        # 加载图片
        content_image = self.load_img(content_path)
        style_image = self.load_img(style_path)
        # 预处理
        content_image = self.preprocess_img(content_image)
        style_image = self.preprocess_img(style_image)
        # 创建模型
        model = self.get_model()
        # 提取目标特征
        style_targets = model(style_image)[:self.num_style_layers]
        content_targets = model(content_image)[self.num_style_layers:]
        # 初始化优化图像
        generated_image = tf.Variable(content_image, dtype=tf.float32)
        # 优化器
        optimizer = tf.optimizers.Adam(learning_rate=5.0)
        @tf.function
        def train_step(image):
            with tf.GradientTape() as tape:
                outputs = model(image)
                loss = self.style_content_loss(
                    outputs, style_targets, content_targets
                )
            grad = tape.gradient(loss, image)
            optimizer.apply_gradients([(grad, image)])
            image.assign(tf.clip_by_value(image, -100, 150))
            return loss
        # 训练循环
        for epoch in range(epochs):
            for step in range(steps_per_epoch):
                loss = train_step(generated_image)
                if step % 10 == 0:
                    print(f"Epoch {epoch+1}, Step {step}: loss = {loss.numpy():.4f}")
        # 反处理并返回结果
        result_img = self.deprocess_img(generated_image.numpy())
        return result_img
# 使用示例
if __name__ == "__main__":
    transfer = StyleTransferTF()
    result = transfer.transfer_style("content.jpg", "style.jpg", epochs=10, steps_per_epoch=100)
    # 显示结果
    plt.imshow(result)
    plt.axis('off')
    plt.show()
    # 保存结果
    Image.fromarray(result).save("style_transfer_result.jpg")

快速实现（使用第三方库）

安装

pip install neural-style-transfer

使用

from neural_style_transfer import StyleTransfer
# 初始化
st = StyleTransfer()
# 执行风格迁移
st.transfer(
    content_image="content.jpg",
    style_image="style.jpg",
    output_image="result.jpg",
    iterations=1000
)

交互式Web应用（Flask）

from flask import Flask, request, jsonify, send_file
import base64
from io import BytesIO
import torch
app = Flask(__name__)
@app.route('/style_transfer', methods=['POST'])
def style_transfer():
    # 获取上传的图片
    content_file = request.files['content']
    style_file = request.files['style']
    # 保存临时文件
    content_path = 'temp_content.jpg'
    style_path = 'temp_style.jpg'
    content_file.save(content_path)
    style_file.save(style_path)
    # 执行风格迁移
    transfer = StyleTransfer()
    result = transfer.transfer_style(content_path, style_path)
    # 返回结果
    result_image = transforms.ToPILImage()(result.cpu().squeeze(0) * 0.5 + 0.5)
    img_io = BytesIO()
    result_image.save(img_io, 'JPEG', quality=90)
    img_io.seek(0)
    return send_file(img_io, mimetype='image/jpeg')
if __name__ == '__main__':
    app.run(debug=True)

性能优化技巧

class OptimizedStyleTransfer:
    """优化的风格迁移实现"""
    def __init__(self):
        # 使用混合精度训练
        self.scaler = torch.cuda.amp.GradScaler()
    def transfer_with_optimization(self, content_path, style_path):
        """优化的风格迁移"""
        # 1. 图像金字塔（多尺度处理）
        scales = [0.5, 0.75, 1.0]
        for scale in scales:
            # 调整图像大小
            content_scaled = self.resize_image(content_path, scale)
            style_scaled = self.resize_image(style_path, scale)
            # 在这个尺度上优化
            result = self.optimize_at_scale(content_scaled, style_scaled)
        # 2. 混合精度训练
        with torch.cuda.amp.autocast():
            # 前向传播
            features = self.model(result)
            loss = self.compute_loss(features, content_features, style_features)
        # 3. 梯度累积（处理大图像）
        accumulation_steps = 4
        for i, batch in enumerate(dataloader):
            loss = loss / accumulation_steps
            self.scaler.scale(loss).backward()
            if (i + 1) % accumulation_steps == 0:
                self.scaler.step(optimizer)
                self.scaler.update()
                optimizer.zero_grad()
        return result

选择建议

快速原型：使用预训练模型（VGG19 + Gram矩阵）
生产环境：使用TensorFlow/Keras实现，易于部署
高质量结果：使用PyTorch实现，可调节性强
实时处理：使用优化的轻量级模型
Web应用：使用Flask框架搭建API服务

关键参数调优

style_weight: 风格权重（通常1e6-1e10）
content_weight: 内容权重（通常1-10）
num_steps: 迭代次数（500-2000）
learning_rate: 学习率（0.001-0.01）

这个实现可以根据具体需求进行调整和优化！