File size: 12,494 Bytes

b03742a

import openai
import base64
from pathlib import Path
import random
import os



evaluation_prompts = {
    "identity": """
    Compare the original subject image with the generated image.
    Rate on a scale of 1-5 how well the essential identifying features 
    are preserved (logos, brand marks, distinctive patterns).
    Score: [1-5]
    Reasoning: [explanation]
    """,
    
    "material": """
    Evaluate the material quality and surface characteristics.
    Rate on a scale of 1-5 how accurately materials are represented 
    (textures, reflections, surface properties).
    Score: [1-5]
    Reasoning: [explanation]
    """,
    
    "color": """
    Assess color fidelity in regions NOT specified for modification.
    Rate on a scale of 1-5 how consistent colors remain.
    Score: [1-5]
    Reasoning: [explanation]
    """,
    
    "appearance": """
    Evaluate the overall realism and coherence of the generated image.
    Rate on a scale of 1-5 how realistic and natural it appears.
    Score: [1-5]
    Reasoning: [explanation]
    """,
    
    "modification": """
    Given the text prompt: "{prompt}"
    Rate on a scale of 1-5 how well the specified changes are executed.
    Score: [1-5]
    Reasoning: [explanation]
    """
}


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def evaluate_subject_driven_generation(
    original_image_path,
    generated_image_path,
    text_prompt,
    client
):
    """
    Evaluate a subject-driven generation using GPT-4o vision
    """
    
    # Encode images
    original_img = encode_image(original_image_path)
    generated_img = encode_image(generated_image_path)
    
    results = {}
    
    # 1. Identity Preservation
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": "Original subject image:"},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{original_img}"}},
                {"type": "text", "text": "Generated image:"},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{generated_img}"}},
                {"type": "text", "text": evaluation_prompts["identity"]}
            ]
        }],
        max_tokens=300
    )
    results['identity'] = parse_score(response.choices[0].message.content)
    
    # 2. Material Quality
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": "Evaluate this generated image:"},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{generated_img}"}},
                {"type": "text", "text": evaluation_prompts["material"]}
            ]
        }],
        max_tokens=300
    )
    results['material'] = parse_score(response.choices[0].message.content)
    
    # 3. Color Fidelity
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": "Original:"},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{original_img}"}},
                {"type": "text", "text": "Generated:"},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{generated_img}"}},
                {"type": "text", "text": evaluation_prompts["color"]}
            ]
        }],
        max_tokens=300
    )
    results['color'] = parse_score(response.choices[0].message.content)
    
    # 4. Natural Appearance
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{generated_img}"}},
                {"type": "text", "text": evaluation_prompts["appearance"]}
            ]
        }],
        max_tokens=300
    )
    results['appearance'] = parse_score(response.choices[0].message.content)
    
    # 5. Modification Accuracy
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": f"Text prompt: {text_prompt}"},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{generated_img}"}},
                {"type": "text", "text": evaluation_prompts["modification"].format(prompt=text_prompt)}
            ]
        }],
        max_tokens=300
    )
    results['modification'] = parse_score(response.choices[0].message.content)
    
    return results

def parse_score(response_text):
    """Extract score from GPT-4o response"""
    # Look for "Score: X" pattern
    import re
    match = re.search(r'Score:\s*(\d+)', response_text)
    if match:
        return int(match.group(1))
    return None

subject_names = [
    "backpack", "backpack_dog", "bear_plushie", "berry_bowl", "can",
    "candle", "cat", "cat2", "clock", "colorful_sneaker",
    "dog", "dog2", "dog3", "dog5", "dog6",
    "dog7", "dog8", "duck_toy", "fancy_boot", "grey_sloth_plushie",
    "monster_toy", "pink_sunglasses", "poop_emoji", "rc_car", "red_cartoon",
    "robot_toy", "shiny_sneaker", "teapot", "vase", "wolf_plushie"
]


def get_prompt(subject_id, prompt_id):
    
    # subject in 0|1|2|3|4|5|8|9|17|18|19|20|21|22|23|24|25|26|27|28|29
    if subject_id in [0,1,2,3,4,5,8,9,17,18,19,20,21,22,23,24,25,26,27,28,29]:
        subject_name = subject_names[subject_id]
        prompts = [
            f"a {subject_name} in the jungle",
            f"a {subject_name} in the snow",
            f"a {subject_name} on the beach",
            f"a {subject_name} on a cobblestone street",
            f"a {subject_name} on top of pink fabric",
            f"a {subject_name} on top of a wooden floor",
            f"a {subject_name} with a city in the background",
            f"a {subject_name} with a mountain in the background",
            f"a {subject_name} with a blue house in the background",
            f"a {subject_name} on top of a purple rug in a forest",
            f"a {subject_name} with a wheat field in the background",
            f"a {subject_name} with a tree and autumn leaves in the background",
            f"a {subject_name} with the Eiffel Tower in the background",
            f"a {subject_name} floating on top of water",
            f"a {subject_name} floating in an ocean of milk",
            f"a {subject_name} on top of green grass with sunflowers around it",
            f"a {subject_name} on top of a mirror",
            f"a {subject_name} on top of the sidewalk in a crowded street",
            f"a {subject_name} on top of a dirt road",
            f"a {subject_name} on top of a white rug",
            f"a red {subject_name}",
            f"a purple {subject_name}",
            f"a shiny {subject_name}",
            f"a wet {subject_name}",
            f"a cube shaped {subject_name}"
        ]
        
    else:
        prompts = [
            f"a {subject_name} in the jungle",
            f"a {subject_name} in the snow",
            f"a {subject_name} on the beach",
            f"a {subject_name} on a cobblestone street",
            f"a {subject_name} on top of pink fabric",
            f"a {subject_name} on top of a wooden floor",
            f"a {subject_name} with a city in the background",
            f"a {subject_name} with a mountain in the background",
            f"a {subject_name} with a blue house in the background",
            f"a {subject_name} on top of a purple rug in a forest",
            f"a {subject_name} wearing a red hat",
            f"a {subject_name} wearing a santa hat",
            f"a {subject_name} wearing a rainbow scarf",
            f"a {subject_name} wearing a black top hat and a monocle",
            f"a {subject_name} in a chef outfit",
            f"a {subject_name} in a firefighter outfit",
            f"a {subject_name} in a police outfit",
            f"a {subject_name} wearing pink glasses",
            f"a {subject_name} wearing a yellow shirt",
            f"a {subject_name} in a purple wizard outfit",
            f"a red {subject_name}",
            f"a purple {subject_name}",
            f"a shiny {subject_name}",
            f"a wet {subject_name}",
            f"a cube shaped {subject_name}"
        ]
        
    return prompts[prompt_id]





def batch_evaluate_dreambooth(client, generate_fn, dataset_path, output_csv):
    """
    Evaluate 750 image pairs with 5 seeds each
    """
    import pandas as pd
    
    results_list = []
    
    # Iterate through DreamBooth dataset
    for subject_id in range(30):  # 30 subjects
        subject_name = subject_names[subject_id]
        for prompt_id in range(25):  # 25 prompts per subject
            original = f"{dataset_path}/{subject_name}"
            # get a random file in this folder
            original_files = list(Path(original).glob("*.png"))
            if len(original_files) == 0:
                raise ValueError(f"No original images found in {original}")
            
            original = str(original_files[0])

            
            for seed in range(5):  # 5 different seeds
                # take random file in the folder
                prompt = get_prompt(subject_id, prompt_id) 
                
                # generated image path
                generated_folder = f"{dataset_path}/{subject_name}/generated/"
                os.makedirs(generated_folder, exist_ok=True)
                generated = f"{generated_folder}/gen_seed{seed}_prompt{prompt_id}.png"
                
                generate_fn(
                    prompt=prompt,
                    subject_image_path=original,
                    output_image_path=generated,
                    seed=seed
                )
                
                scores = evaluate_subject_driven_generation(
                    original, generated, prompt, client
                )
                
                results_list.append({
                    'subject_id': subject_id,
                    'subject_name': subject_name,
                    'prompt_id': prompt_id,
                    'seed': seed,
                    'prompt': prompt,
                    
                    **scores
                })
    
    # Save results
    df = pd.DataFrame(results_list)
    df.to_csv(output_csv, index=False)
    
    # Calculate statistics
    print(df.groupby('subject_id').mean())
    print(f"\nOverall averages:")
    print(df[['identity', 'material', 'color', 'appearance', 'modification']].mean())
    
    
def evaluate_omini_control():
    
    import torch
    from diffusers.pipelines import FluxPipeline
    from PIL import Image
    
    from omini.pipeline.flux_omini import Condition, generate, seed_everything

    pipe = FluxPipeline.from_pretrained(
        "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16
    )
    
    pipe = pipe.to("cuda")
    pipe.load_lora_weights(
        "Yuanshi/OminiControl",
        weight_name=f"omini/subject_512.safetensors",
        adapter_name="subject",
    )
    
    def generate_fn(image_path, prompt, seed, output_path):
        seed_everything(seed)
        
        image = Image.open(image_path).convert("RGB").resize((512, 512))
        condition = Condition.from_image(
            image,
            "subject", position_delta=(0, 32)
        )
        
        result_img = generate(
            pipe,
            prompt=prompt,
            conditions=[condition],
        ).images[0]
        
        result_img.save(output_path)
    
    return generate_fn


if __name__ == "__main__":
    
    
    
    openai.api_key = os.getenv("OPENAI_API_KEY")
    # client = openai.Client()
    
    # generate_fn = evaluate_omini_control()
    
    # dataset_path = "data/dreambooth"
    # output_csv = "evaluation_subject_driven_omini_control.csv"
    
    # batch_evaluate_dreambooth(
    #     client,
    #     generate_fn,
    #     dataset_path,
    #     output_csv
    # )
    
    result = evaluate_subject_driven_generation(
        "data/dreambooth/backpack/00.jpg",
        "data/dreambooth/backpack/01.jpg",
        "a backpack in the jungle",
        openai.Client()
    )
    
    print(result)