Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
.venv/
/.venv/
*.pyc
__pycache__/
__pycache__/
/generated/
23 changes: 13 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# python-generate-image

간단한 Python 기반 이미지/영상 생성 실험 저장소입니다.
`diffusers`와 `torch`를 이용해 텍스트→이미지, 이미지→이미지, 포즈 기반 영상 프레임 생성, ffmpeg 후처리(영상 합성/오디오 병합)를 수행합니다.
이제 단일 FastAPI 백엔드 모듈과 프런트엔드가 결합된 **생성 스튜디오** 형태로 동작합니다.

## 기술 문서
## 웹 스튜디오 기능

상세 기술 스택 문서는 `DOC/Chapter01.md` ~ `DOC/Chapter10.md`를 참고하세요.
- Hugging Face 모델 선택
- 이미지/동영상(미리보기 GIF) 생성 모드 선택
- 이미지 Width/Height 입력
- 동영상 사이즈 선택 (square / landscape / portrait)
- 프롬프트 입력 후 결과 생성 및 미리보기

## 웹 대시보드 (FastAPI + Vanilla JS)
## API

- 백엔드: FastAPI
- 프런트엔드: Vanilla JS (정적 파일)
- 주요 API
- `GET /api/health`
- `GET /api/stack`
- `GET /api/files`
- `GET /api/health`
- `GET /api/options`
- `POST /api/generate`
- `GET /api/files`
- `GET /outputs/{filename}`

## 로컬 실행

Expand Down
Empty file added backend/__init__.py
Empty file.
Empty file added backend/app/__init__.py
Empty file.
108 changes: 108 additions & 0 deletions backend/app/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from __future__ import annotations

import html
import textwrap
import uuid
from pathlib import Path

from pydantic import BaseModel, Field, field_validator


class GenerateRequest(BaseModel):
model_config = {"protected_namespaces": ()}
model_id: str = Field(min_length=1)
output_type: str = Field(pattern="^(image|video)$")
prompt: str = Field(min_length=1, max_length=1000)
width: int = Field(default=768, ge=256, le=1536)
height: int = Field(default=768, ge=256, le=1536)
video_size: str = Field(default="square")

@field_validator("video_size")
@classmethod
def validate_video_size(cls, value: str) -> str:
allowed = {"square", "landscape", "portrait"}
if value not in allowed:
raise ValueError(f"video_size must be one of {sorted(allowed)}")
return value


class GenerationResult(BaseModel):
model_config = {"protected_namespaces": ()}
file_url: str
media_type: str
width: int
height: int
model_id: str
prompt: str


class GeneratorService:
VIDEO_SIZES = {
"square": (768, 768),
"landscape": (1024, 576),
"portrait": (576, 1024),
}

def __init__(self, output_dir: Path):
self.output_dir = output_dir
self.output_dir.mkdir(parents=True, exist_ok=True)

@staticmethod
def available_models() -> list[str]:
return [
"runwayml/stable-diffusion-v1-5",
"stabilityai/sdxl-turbo",
"stabilityai/stable-diffusion-3-medium-diffusers",
]

def generate(self, request: GenerateRequest) -> GenerationResult:
width, height = request.width, request.height
label = "IMAGE"

if request.output_type == "video":
width, height = self.VIDEO_SIZES[request.video_size]
label = f"VIDEO PREVIEW ({request.video_size})"

output_path = self._create_preview_svg(
model_id=request.model_id,
prompt=request.prompt,
width=width,
height=height,
label=label,
)
return GenerationResult(
file_url=f"/outputs/{output_path.name}",
media_type="image/svg+xml",
width=width,
height=height,
model_id=request.model_id,
prompt=request.prompt,
)

def _create_preview_svg(self, model_id: str, prompt: str, width: int, height: int, label: str) -> Path:
escaped_lines = textwrap.wrap(f"Prompt: {prompt}", width=60)
text_lines = [
label,
f"Model: {model_id}",
*escaped_lines,
]

text_nodes = []
y = 80
for line in text_lines:
safe_line = html.escape(line)
text_nodes.append(
f'<text x="36" y="{y}" fill="#e5e7eb" font-size="24" font-family="Arial">{safe_line}</text>'
)
y += 36

svg = f'''<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">
<rect width="100%" height="100%" fill="#0f172a" />
<rect x="12" y="12" width="{width - 24}" height="{height - 24}" fill="none" stroke="#22d3ee" stroke-width="4" rx="10" />
{"".join(text_nodes)}
</svg>
'''
file_name = f"asset_{uuid.uuid4().hex[:8]}.svg"
path = self.output_dir / file_name
path.write_text(svg, encoding="utf-8")
return path
49 changes: 28 additions & 21 deletions backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,29 @@
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel

from backend.app.generator import GenerateRequest, GenerationResult, GeneratorService

BASE_DIR = Path(__file__).resolve().parents[2]
FRONTEND_DIR = BASE_DIR / "frontend"
OUTPUT_DIR = BASE_DIR / "generated"


class HealthResponse(BaseModel):
status: str


class StackItem(BaseModel):
name: str
category: str
description: str


class FilesResponse(BaseModel):
python_files: list[str]


app = FastAPI(title="python-generate-image dashboard", version="1.0.0")
class GenerationOptions(BaseModel):
models: list[str]
output_types: list[str]
video_sizes: list[str]


app = FastAPI(title="python-generate-image studio", version="2.0.0")
service = GeneratorService(output_dir=OUTPUT_DIR)

app.add_middleware(
CORSMiddleware,
Expand All @@ -40,29 +44,32 @@ def health() -> HealthResponse:
return HealthResponse(status="ok")


@app.get("/api/stack", response_model=list[StackItem])
def stack() -> list[StackItem]:
return [
StackItem(name="Python 3", category="Language", description="Core runtime for scripts and API service."),
StackItem(name="FastAPI", category="Backend", description="REST API and static file serving."),
StackItem(name="Vanilla JavaScript", category="Frontend", description="Lightweight browser UI without framework."),
StackItem(name="PyTorch", category="AI", description="Tensor runtime for CPU/GPU inference."),
StackItem(name="Diffusers", category="AI", description="Stable Diffusion and related image generation pipelines."),
StackItem(name="OpenCV/Pillow", category="Media", description="Frame and image processing utilities."),
StackItem(name="FFmpeg", category="Media", description="Video encoding and audio merge pipeline."),
StackItem(name="Docker", category="DevOps", description="Container-based local execution and testing."),
]


@app.get("/api/files", response_model=FilesResponse)
def files() -> FilesResponse:
python_files = sorted(path.name for path in BASE_DIR.glob("*.py"))
return FilesResponse(python_files=python_files)


@app.get("/api/options", response_model=GenerationOptions)
def options() -> GenerationOptions:
return GenerationOptions(
models=service.available_models(),
output_types=["image", "video"],
video_sizes=list(service.VIDEO_SIZES.keys()),
)


@app.post("/api/generate", response_model=GenerationResult)
def generate(payload: GenerateRequest) -> GenerationResult:
return service.generate(payload)


if FRONTEND_DIR.exists():
app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static")

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
app.mount("/outputs", StaticFiles(directory=OUTPUT_DIR), name="outputs")


@app.get("/")
def index() -> FileResponse:
Expand Down
109 changes: 75 additions & 34 deletions frontend/app.js
Original file line number Diff line number Diff line change
@@ -1,53 +1,94 @@
async function fetchJson(url) {
const response = await fetch(url);
async function fetchJson(url, options) {
const response = await fetch(url, options);
if (!response.ok) {
throw new Error(`요청 실패: ${url}`);
const text = await response.text();
throw new Error(text || `요청 실패: ${url}`);
}
return response.json();
}

function renderList(elementId, items, formatter) {
const el = document.getElementById(elementId);
el.innerHTML = "";
function toggleModeUI(outputType) {
const imageFields = document.querySelectorAll('.image-only');
const videoFields = document.querySelectorAll('.video-only');
const isImage = outputType === 'image';

if (!items.length) {
const li = document.createElement("li");
li.className = "muted";
li.textContent = "데이터가 없습니다.";
el.appendChild(li);
return;
}
imageFields.forEach((field) => field.classList.toggle('hidden', !isImage));
videoFields.forEach((field) => field.classList.toggle('hidden', isImage));
}

items.forEach((item) => {
const li = document.createElement("li");
li.textContent = formatter(item);
el.appendChild(li);
async function initializeForm() {
const data = await fetchJson('/api/options');
const modelSelect = document.getElementById('model-id');

modelSelect.innerHTML = '';
data.models.forEach((modelId) => {
const option = document.createElement('option');
option.value = modelId;
option.textContent = modelId;
modelSelect.appendChild(option);
});
}

async function loadDashboard() {
const healthEl = document.getElementById("health");
function readPayload() {
const outputType = document.getElementById('output-type').value;
return {
model_id: document.getElementById('model-id').value,
output_type: outputType,
width: Number(document.getElementById('width').value),
height: Number(document.getElementById('height').value),
video_size: document.getElementById('video-size').value,
prompt: document.getElementById('prompt').value.trim(),
};
}

async function generate(event) {
event.preventDefault();

const statusEl = document.getElementById('status');
const resultImage = document.getElementById('result-image');
const button = document.getElementById('submit-button');

const payload = readPayload();

if (!payload.prompt) {
statusEl.textContent = '프롬프트를 입력해 주세요.';
return;
}

button.disabled = true;
statusEl.textContent = '생성 중...';

try {
const [health, stack, files] = await Promise.all([
fetchJson("/api/health"),
fetchJson("/api/stack"),
fetchJson("/api/files"),
]);
const data = await fetchJson('/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});

healthEl.textContent = `상태: ${health.status}`;
resultImage.src = `${data.file_url}?t=${Date.now()}`;
resultImage.classList.remove('hidden');
statusEl.textContent = `완료: ${data.model_id} / ${data.width}x${data.height}`;
} catch (error) {
statusEl.textContent = `실패: ${error.message}`;
} finally {
button.disabled = false;
}
}

renderList(
"stack-list",
stack,
(item) => `[${item.category}] ${item.name} - ${item.description}`,
);
async function boot() {
const outputTypeEl = document.getElementById('output-type');
outputTypeEl.addEventListener('change', (event) => toggleModeUI(event.target.value));

renderList("files-list", files.python_files, (name) => name);
try {
await initializeForm();
} catch (error) {
healthEl.textContent = "상태 확인 실패";
healthEl.classList.add("muted");
const statusEl = document.getElementById('status');
statusEl.textContent = '초기 데이터 로드 실패';
statusEl.classList.add('muted');
}

toggleModeUI(outputTypeEl.value);
document.getElementById('generate-form').addEventListener('submit', generate);
}

loadDashboard();
boot();
Loading
Loading