图像生成 Demo
这是我使用 AI 模型生成图像的示例代码。
功能特性
- 文本到图像生成
- 图像到图像转换
- 批量生成
- 参数调优
技术栈
- Python 3.8+
- diffusers (Hugging Face)
- torch
- PIL / Pillow
Stable Diffusion 示例
基础图像生成
from diffusers import StableDiffusionPipeline
import torch
# 加载模型
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16
)
pipe = pipe.to("cuda")
# 生成图像
prompt = "a beautiful landscape, mountains, sunset, 4k"
image = pipe(prompt).images[0]
image.save("output.png")
带参数的生成
image = pipe(
prompt="a cat sitting on a windowsill, sunlight",
negative_prompt="blurry, low quality",
num_inference_steps=50,
guidance_scale=7.5,
height=512,
width=512,
seed=42
).images[0]
批量生成
def generate_batch(prompts, output_dir="outputs"):
import os
os.makedirs(output_dir, exist_ok=True)
for i, prompt in enumerate(prompts):
image = pipe(prompt).images[0]
image.save(f"{output_dir}/image_{i:03d}.png")
print(f"生成完成: {i+1}/{len(prompts)}")
prompts = [
"a cat in a garden",
"a dog in a park",
"a bird in the sky"
]
generate_batch(prompts)
Img2Img 示例
from diffusers import StableDiffusionImg2ImgPipeline
from PIL import Image
import torch
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16
)
pipe = pipe.to("cuda")
# 加载原始图像
init_image = Image.open("input.jpg").convert("RGB")
init_image = init_image.resize((512, 512))
# 图像转换
prompt = "anime style"
image = pipe(
prompt=prompt,
image=init_image,
strength=0.75, # 转换强度 (0-1)
num_inference_steps=50
).images[0]
image.save("output_anime.png")
Inpainting 示例
from diffusers import StableDiffusionInpaintPipeline
from PIL import Image
import numpy as np
import torch
pipe = StableDiffusionInpaintPipeline.from_pretrained(
"runwayml/stable-diffusion-inpainting",
torch_dtype=torch.float16
)
pipe = pipe.to("cuda")
# 加载图像和遮罩
image = Image.open("input.jpg").convert("RGB")
mask = Image.open("mask.png").convert("RGB")
# 局部重绘
prompt = "a red car"
image = pipe(
prompt=prompt,
image=image,
mask_image=mask,
num_inference_steps=50
).images[0]
image.save("output_inpaint.png")
ControlNet 示例
from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
from diffusers.utils import load_image
import torch
# 加载 ControlNet
controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-canny",
torch_dtype=torch.float16
)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlnet,
torch_dtype=torch.float16
)
pipe = pipe.to("cuda")
# 加载控制图像(边缘图)
control_image = load_image("edge_image.png")
# 生成图像
image = pipe(
"a beautiful landscape",
image=control_image,
num_inference_steps=50
).images[0]
image.save("output_controlnet.png")
Web 应用示例
Flask 实现
from flask import Flask, request, send_file
from diffusers import StableDiffusionPipeline
import torch
from PIL import Image
import io
app = Flask(__name__)
# 加载模型
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16
)
pipe = pipe.to("cuda")
@app.route("/generate", methods=["POST"])
def generate():
data = request.json
prompt = data.get("prompt", "")
if not prompt:
return {"error": "Prompt is required"}, 400
# 生成图像
image = pipe(prompt).images[0]
# 转换为字节流
img_io = io.BytesIO()
image.save(img_io, "PNG")
img_io.seek(0)
return send_file(img_io, mimetype="image/png")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
前端界面
<!DOCTYPE html>
<html>
<head>
<title>AI 图像生成</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
#prompt-input {
width: 100%;
padding: 10px;
margin-bottom: 10px;
}
#generate-btn {
padding: 10px 20px;
background: #007bff;
color: white;
border: none;
cursor: pointer;
}
#result-image {
margin-top: 20px;
max-width: 100%;
}
</style>
</head>
<body>
<h1>AI 图像生成</h1>
<input type="text" id="prompt-input" placeholder="输入描述...">
<button id="generate-btn" onclick="generateImage()">生成图像</button>
<div id="loading" style="display: none;">生成中...</div>
<img id="result-image" style="display: none;">
<script>
async function generateImage() {
const prompt = document.getElementById('prompt-input').value;
if (!prompt) {
alert('请输入描述');
return;
}
document.getElementById('loading').style.display = 'block';
document.getElementById('result-image').style.display = 'none';
try {
const response = await fetch('/generate', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({prompt: prompt})
});
if (response.ok) {
const blob = await response.blob();
const url = URL.createObjectURL(blob);
document.getElementById('result-image').src = url;
document.getElementById('result-image').style.display = 'block';
} else {
alert('生成失败');
}
} catch (error) {
console.error('Error:', error);
alert('生成失败');
} finally {
document.getElementById('loading').style.display = 'none';
}
}
</script>
</body>
</html>
性能优化
内存优化
# 启用 CPU offload
pipe.enable_model_cpu_offload()
# 启用注意力切片
pipe.enable_attention_slicing()
# 启用 VAE 切片
pipe.enable_vae_slicing()
速度优化
# 使用 xFormers
pipe.enable_xformers_memory_efficient_attention()
# 使用编译
pipe.unet = torch.compile(pipe.unet)
运行说明
安装依赖
pip install diffusers transformers torch pillow accelerate
运行示例
python basic_generation.py
运行 Web 服务
python app.py
然后访问 http://localhost:5000
扩展建议
- 添加模型选择: 支持切换不同的模型
- 参数调优界面: 提供参数调整 UI
- 历史记录: 保存生成历史
- 批量处理: 支持批量生成和下载
- 图像编辑: 集成图像编辑功能