| | |
| | import os |
| | import io |
| | import math |
| | from typing import Tuple, Dict, Any |
| | from PIL import Image, ImageOps |
| | import numpy as np |
| |
|
| | import torch |
| | from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler |
| | from transformers import logging as hf_logging |
| | hf_logging.set_verbosity_error() |
| |
|
| | |
| | from controlnet_aux import OpenposeDetector |
| |
|
| | |
| | from rembg import remove |
| |
|
| | |
| | MODEL_ID = "runwayml/stable-diffusion-v1-5" |
| | CONTROLNET_ID = "lllyasviel/sd-controlnet-openpose" |
| | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
| |
|
| | |
| | _PIPELINE = None |
| | _OP_DETECTOR = None |
| |
|
| | def get_openpose_detector(): |
| | global _OP_DETECTOR |
| | if _OP_DETECTOR is None: |
| | _OP_DETECTOR = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") |
| | return _OP_DETECTOR |
| |
|
| | def load_pipeline(): |
| | """ |
| | Carrega o pipeline ControlNet + Stable Diffusion (com half precision quando possível). |
| | """ |
| | global _PIPELINE |
| | if _PIPELINE is not None: |
| | return _PIPELINE |
| |
|
| | |
| | controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32) |
| | |
| | pipe = StableDiffusionControlNetPipeline.from_pretrained( |
| | MODEL_ID, |
| | controlnet=controlnet, |
| | safety_checker=None, |
| | torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32, |
| | ) |
| | |
| | pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) |
| | if DEVICE == "cuda": |
| | pipe.enable_attention_slicing() |
| | pipe.to("cuda") |
| | else: |
| | pipe.to("cpu") |
| |
|
| | |
| | _PIPELINE = pipe |
| | return _PIPELINE |
| |
|
| | def remove_background(pil_img: Image.Image) -> Image.Image: |
| | """ |
| | Remove fundo da imagem da peça usando rembg (retorna RGBA com alpha). |
| | """ |
| | |
| | img_bytes = io.BytesIO() |
| | pil_img.convert("RGBA").save(img_bytes, format="PNG") |
| | img_bytes = img_bytes.getvalue() |
| | out = remove(img_bytes) |
| | |
| | out_img = Image.open(io.BytesIO(out)).convert("RGBA") |
| | return out_img |
| |
|
| | def simple_align_garment_to_model(model_img: Image.Image, garment_rgba: Image.Image, pose_keypoints=None) -> Image.Image: |
| | """ |
| | Faz um alinhamento simples: escala a peça pela distância entre ombros (estimada) |
| | e cola-a sobre a modelo aproximadamente no torso. Retorna imagem RGBA (com a modelo). |
| | Isso é só a iniciação — o SD+ControlNet fará o refinamento. |
| | """ |
| | model = model_img.convert("RGBA") |
| | g = garment_rgba |
| |
|
| | Wm, Hm = model.size |
| | Wg, Hg = g.size |
| |
|
| | |
| | if pose_keypoints is None: |
| | |
| | target_w = int(Wm * 0.5) |
| | scale = target_w / Wg |
| | new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale))) |
| | g_resized = g.resize(new_size, resample=Image.LANCZOS) |
| | pos = ((Wm - new_size[0]) // 2, int(Hm * 0.28)) |
| | canvas = model.copy() |
| | canvas.paste(g_resized, pos, g_resized) |
| | return canvas |
| |
|
| | |
| | try: |
| | |
| | ls = pose_keypoints.get("left_shoulder") |
| | rs = pose_keypoints.get("right_shoulder") |
| | if ls and rs: |
| | shoulder_dist = math.hypot(rs[0]-ls[0], rs[1]-ls[1]) |
| | |
| | target_w = int(shoulder_dist * 1.4) |
| | scale = max(0.1, target_w / Wg) |
| | new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale))) |
| | g_resized = g.resize(new_size, resample=Image.LANCZOS) |
| | |
| | center_x = int((ls[0] + rs[0]) / 2) |
| | top_y = int((ls[1] + rs[1]) / 1.8) |
| | pos = (max(0, center_x - new_size[0]//2), max(0, top_y - new_size[1]//6)) |
| | canvas = model.copy() |
| | canvas.paste(g_resized, pos, g_resized) |
| | return canvas |
| | except Exception: |
| | pass |
| |
|
| | |
| | return simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=None) |
| |
|
| | def extract_pose_and_keypoints(model_img: Image.Image) -> Tuple[Image.Image, Dict[str, Tuple[int,int]]]: |
| | """ |
| | Usa controlnet_aux.OpenposeDetector para gerar a pose map (imagem) e tenta retornar |
| | keypoints úteis (ombros). keypoints dict = {"left_shoulder":(x,y), ...} |
| | """ |
| | detector = get_openpose_detector() |
| | try: |
| | |
| | pose_image = detector(model_img) |
| | pose_image = pose_image.convert("RGB") |
| |
|
| | |
| | keypoints = {} |
| | try: |
| | |
| | |
| | |
| | pass |
| | except Exception: |
| | pass |
| |
|
| | return pose_image, keypoints |
| |
|
| | except Exception as e: |
| | |
| | blank = Image.new("RGB", model_img.size, (255,255,255)) |
| | return blank, {} |
| |
|
| | def run_pipeline(model_image: Image.Image, garment_image: Image.Image, prompt_extra: str = "") -> Tuple[Image.Image, Dict[str,Any]]: |
| | """ |
| | Função principal que: |
| | 1) extrai pose (pose_map) |
| | 2) remove fundo da peça (garment) e alinha simplisticamente |
| | 3) monta uma imagem inicial (init_image) com a peça sobre a modelo (RGBA) |
| | 4) chama Stable Diffusion + ControlNet (image2image) usando pose_map como conditioning image |
| | Retorna: pil_image_result, info_dict |
| | """ |
| | |
| | max_side = 768 |
| | model_img = model_image.convert("RGB") |
| | W, H = model_img.size |
| | scale = max_side / max(W, H) if max(W, H) > max_side else 1.0 |
| | if scale != 1.0: |
| | model_img = model_img.resize((int(W*scale), int(H*scale)), Image.LANCZOS) |
| |
|
| | |
| | garment_rgba = remove_background(garment_image) |
| |
|
| | |
| | pose_map, keypoints = extract_pose_and_keypoints(model_img) |
| |
|
| | |
| | init_composite = simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=keypoints) |
| |
|
| | |
| | pipe = load_pipeline() |
| |
|
| | |
| | prompt = ("photo-realistic fashion try-on, ultra detailed, high resolution, realistic lighting. " |
| | + (prompt_extra or "garment applied on person, preserve texture and zippers, realistic folds.")) |
| |
|
| | |
| | init_image = init_composite.convert("RGB") |
| | control_image = pose_map.convert("RGB") |
| |
|
| | |
| | num_inference_steps = 20 |
| | guidance_scale = 7.5 |
| | strength = 0.75 |
| |
|
| | |
| | generator = torch.Generator(device=DEVICE).manual_seed(torch.randint(0, 2**31 - 1, (1,)).item()) |
| |
|
| | |
| | |
| | device = DEVICE |
| | pipe.to(device) |
| |
|
| | try: |
| | |
| | with torch.autocast(device_type="cuda") if device == "cuda" else torch.cpu.amp.autocast(enabled=False): |
| | out = pipe( |
| | prompt=prompt, |
| | image=init_image, |
| | control_image=control_image, |
| | num_inference_steps=num_inference_steps, |
| | guidance_scale=guidance_scale, |
| | strength=strength, |
| | generator=generator |
| | ) |
| | |
| | result_img = out.images[0] |
| | except TypeError: |
| | |
| | out = pipe( |
| | prompt=prompt, |
| | init_image=init_image, |
| | controlnet_conditioning_image=control_image, |
| | num_inference_steps=num_inference_steps, |
| | guidance_scale=guidance_scale, |
| | strength=strength, |
| | generator=generator |
| | ) |
| | result_img = out.images[0] |
| |
|
| | info = { |
| | "model_id": MODEL_ID, |
| | "controlnet_id": CONTROLNET_ID, |
| | "steps": num_inference_steps, |
| | "guidance_scale": guidance_scale, |
| | "strength": strength |
| | } |
| | return result_img, info |