File size: 6,170 Bytes
b03742a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import torch
from diffusers.pipelines import FluxPipeline
from omini.pipeline.flux_omini import Condition, generate, seed_everything, convert_to_condition
from omini.rotation import RotationConfig, RotationTuner
from PIL import Image
def load_rotation(transformer, path: str, adapter_name: str = "default", strict: bool = False):
"""
Load rotation adapter weights.
Args:
path: Directory containing the saved adapter weights
adapter_name: Name of the adapter to load
strict: Whether to strictly match all keys
"""
from safetensors.torch import load_file
import os
import yaml
device = transformer.device
print(f"device for loading: {device}")
# Try to load safetensors first, then fallback to .pth
safetensors_path = os.path.join(path, f"{adapter_name}.safetensors")
pth_path = os.path.join(path, f"{adapter_name}.pth")
if os.path.exists(safetensors_path):
state_dict = load_file(safetensors_path)
print(f"Loaded rotation adapter from {safetensors_path}")
elif os.path.exists(pth_path):
state_dict = torch.load(pth_path, map_location=device)
print(f"Loaded rotation adapter from {pth_path}")
else:
raise FileNotFoundError(
f"No adapter weights found for '{adapter_name}' in {path}\n"
f"Looking for: {safetensors_path} or {pth_path}"
)
# # Get the device and dtype of the transformer
transformer_device = next(transformer.parameters()).device
transformer_dtype = next(transformer.parameters()).dtype
state_dict_with_adapter = {}
for k, v in state_dict.items():
# Reconstruct the full key with adapter name
new_key = k.replace(".rotation.", f".rotation.{adapter_name}.")
if "_adapter_config" in new_key:
print(f"adapter_config key: {new_key}")
# Move to target device and dtype
# Check if this parameter should keep its original dtype (e.g., indices, masks)
if v.dtype in [torch.long, torch.int, torch.int32, torch.int64, torch.bool]:
# Keep integer/boolean dtypes, only move device
state_dict_with_adapter[new_key] = v.to(device=transformer_device)
else:
# Convert floating point tensors to target dtype and device
state_dict_with_adapter[new_key] = v.to(device=transformer_device, dtype=transformer_dtype)
# Add adapter name back to keys (reverse of what we did in save)
state_dict_with_adapter = {
k.replace(".rotation.", f".rotation.{adapter_name}."): v
for k, v in state_dict.items()
}
# Load into the model
missing, unexpected = transformer.load_state_dict(
state_dict_with_adapter,
strict=strict
)
if missing:
print(f"Missing keys: {missing[:5]}{'...' if len(missing) > 5 else ''}")
if unexpected:
print(f"Unexpected keys: {unexpected[:5]}{'...' if len(unexpected) > 5 else ''}")
# Load config if available
config_path = os.path.join(path, f"{adapter_name}_config.yaml")
if os.path.exists(config_path):
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
print(f"Loaded config: {config}")
total_params = sum(p.numel() for p in state_dict.values())
print(f"Loaded {len(state_dict)} tensors ({total_params:,} parameters)")
return state_dict
# prepare input image and prompt
image = Image.open("assets/coffee.png").convert("RGB")
w, h, min_dim = image.size + (min(image.size),)
image = image.crop(
((w - min_dim) // 2, (h - min_dim) // 2, (w + min_dim) // 2, (h + min_dim) // 2)
).resize((512, 512))
prompt = "In a bright room. A cup of a coffee with some beans on the side. They are placed on a dark wooden table."
canny_image = convert_to_condition("canny", image)
condition = Condition(canny_image, "canny")
seed_everything()
for i in range(40, 60):
pipe = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16
)
# add adapter to the transformer
transformer = pipe.transformer
adapter_name = "default"
transformer._hf_peft_config_loaded = True
rotation_adapter_config = {
"r": 4,
"num_rotations": 4,
"target_modules": "(.*x_embedder|.*(?<!single_)transformer_blocks\\.[0-9]+\\.norm1\\.linear|.*(?<!single_)transformer_blocks\\.[0-9]+\\.attn\\.to_k|.*(?<!single_)transformer_blocks\\.[0-9]+\\.attn\\.to_q|.*(?<!single_)transformer_blocks\\.[0-9]+\\.attn\\.to_v|.*(?<!single_)transformer_blocks\\.[0-9]+\\.attn\\.to_out\\.0|.*(?<!single_)transformer_blocks\\.[0-9]+\\.ff\\.net\\.2|.*single_transformer_blocks\\.[0-9]+\\.norm\\.linear|.*single_transformer_blocks\\.[0-9]+\\.proj_mlp|.*single_transformer_blocks\\.[0-9]+\\.proj_out|.*single_transformer_blocks\\.[0-9]+\\.attn.to_k|.*single_transformer_blocks\\.[0-9]+\\.attn.to_q|.*single_transformer_blocks\\.[0-9]+\\.attn.to_v|.*single_transformer_blocks\\.[0-9]+\\.attn.to_out)",
}
config = RotationConfig(**rotation_adapter_config)
config.T = float(i + 1) / 20
rotation_tuner = RotationTuner(
transformer,
config,
adapter_name=adapter_name,
)
# Convert rotation tuner to bfloat16
transformer = transformer.to(torch.bfloat16)
transformer.set_adapter(adapter_name)
# load adapter weights
load_rotation(
transformer,
path="runs/20251110-191859/ckpt/4000",
adapter_name=adapter_name,
strict=False,
)
pipe = pipe.to("cuda")
result_img = generate(
pipe,
prompt=prompt,
conditions=[condition],
).images[0]
concat_image = Image.new("RGB", (1536, 512))
concat_image.paste(image, (0, 0))
concat_image.paste(condition.condition, (512, 0))
concat_image.paste(result_img, (1024, 0))
# Save images
result_img.save(f"result_{i+1}.png")
concat_image.save(f"result_concat_{i+1}.png")
print(f"Saved result_{i+1}.png and result_concat_{i+1}.png") |