|
|
import gradio as gr |
|
|
import os |
|
|
import spaces |
|
|
import torch |
|
|
from diffusers import AuraFlowPipeline, Lumina2Pipeline, NewbiePipeline |
|
|
from transformers import AutoModel, AutoTokenizer |
|
|
import random |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import copy |
|
|
import warnings |
|
|
import math |
|
|
import time |
|
|
from stablepy import SCHEDULER_CONFIG_MAP, FLUX_SCHEDULE_TYPES, scheduler_names, SCHEDULE_TYPE_OPTIONS, FLUX_SCHEDULE_TYPE_OPTIONS |
|
|
|
|
|
from constants import BASE_PROMPT_NEWBIE, BASE_NEG_PROMPT_NEWBIE, EXAMPLES_NEWBIE, BASE_NEG_PROMPT_PONY7, BASE_PROMPT_NETA |
|
|
from pipeline_newbie_img2img import NewbieImg2ImgPipeline |
|
|
|
|
|
FLOW_MATCH_ONLY_MAP = { |
|
|
k: v for k, v in SCHEDULER_CONFIG_MAP.items() if "FlowMatch" in k |
|
|
} |
|
|
FLOW_MATCH_LIST = list(FLOW_MATCH_ONLY_MAP.keys()) |
|
|
SAMPLER_NEWBIE = [ |
|
|
k for k in FLOW_MATCH_ONLY_MAP.keys() |
|
|
if k not in ["FlowMatch DPM++ SDE", "FlowMatch DPM++ 3M SDE"] |
|
|
] |
|
|
|
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
|
warnings.filterwarnings("ignore") |
|
|
NEWBIE_TOKEN_LIMIT = 1100 |
|
|
|
|
|
model_path = "Disty0/NewBie-image-Exp0.1-Diffusers" |
|
|
text_encoder_2 = AutoModel.from_pretrained(model_path, subfolder="text_encoder_2", trust_remote_code=True, torch_dtype=torch.bfloat16) |
|
|
pipe_newbie = NewbiePipeline.from_pretrained(model_path, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16) |
|
|
pipe_newbie.to("cuda") |
|
|
del text_encoder_2 |
|
|
newbie_default_scheduler = copy.deepcopy(pipe_newbie.scheduler) |
|
|
pipe_newbie_img2img = NewbieImg2ImgPipeline(**pipe_newbie.components).to("cuda") |
|
|
|
|
|
pipe_pony = AuraFlowPipeline.from_pretrained("purplesmartai/pony-v7-base", torch_dtype=torch.bfloat16) |
|
|
pipe_pony.to("cuda") |
|
|
|
|
|
pipe_netayume = Lumina2Pipeline.from_pretrained( |
|
|
"duongve/NetaYume-Lumina-Image-2.0-Diffusers-v35-pretrained", |
|
|
torch_dtype=torch.bfloat16 |
|
|
) |
|
|
pipe_netayume.to("cuda") |
|
|
|
|
|
|
|
|
def set_sampler(pipe, sampler_name, schedule_type, default_config): |
|
|
if sampler_name != FLOW_MATCH_LIST[0]: |
|
|
scheduler_class, config = FLOW_MATCH_ONLY_MAP[sampler_name] |
|
|
pipe.scheduler = scheduler_class.from_config(default_config.config, **config) |
|
|
|
|
|
flux_schedule_config = FLUX_SCHEDULE_TYPES.get(schedule_type) |
|
|
|
|
|
if flux_schedule_config: |
|
|
pipe.scheduler.register_to_config(**flux_schedule_config) |
|
|
|
|
|
return pipe |
|
|
|
|
|
|
|
|
def get_newbie_token_details(prompt, system_prompt, tokenizer): |
|
|
if prompt is None: prompt = "" |
|
|
if system_prompt is None: system_prompt = "" |
|
|
|
|
|
t_sys = tokenizer(str(system_prompt), add_special_tokens=False)["input_ids"] |
|
|
t_sep = tokenizer(" <Prompt Start> ", add_special_tokens=False)["input_ids"] |
|
|
t_prm = tokenizer(str(prompt), add_special_tokens=False)["input_ids"] |
|
|
|
|
|
total_tokens = len(t_sys) + len(t_sep) + len(t_prm) + 2 |
|
|
|
|
|
if total_tokens <= 512: |
|
|
sequence_length = 512 |
|
|
else: |
|
|
sequence_length = math.ceil(total_tokens / 512) * 512 |
|
|
|
|
|
return total_tokens, sequence_length |
|
|
|
|
|
|
|
|
def check_token_count(prompt, system_prompt): |
|
|
try: |
|
|
time.sleep(2) |
|
|
|
|
|
tokenizer = pipe_newbie.tokenizer_2 |
|
|
total, seq_len = get_newbie_token_details(prompt, system_prompt, tokenizer) |
|
|
|
|
|
if total > NEWBIE_TOKEN_LIMIT: |
|
|
return gr.update( |
|
|
value=f"<div style='color: #ef4444; border: 1px solid #ef4444; background-color: #fef2f2; padding: 8px; border-radius: 5px; font-weight: bold; width: 100%; text-align: center;'>" |
|
|
f"⚠️ Token limit exceeded! ({total}/{NEWBIE_TOKEN_LIMIT}). <br>" |
|
|
f"Text will be truncated.</div>", |
|
|
visible=True |
|
|
) |
|
|
else: |
|
|
return gr.update( |
|
|
value=f"<div style='color: #6b7280; font-size: 0.9em; text-align: right; width: 100%;'> {total}/{min(seq_len, NEWBIE_TOKEN_LIMIT)}</div>", |
|
|
visible=True |
|
|
) |
|
|
except Exception: |
|
|
return gr.update(visible=False) |
|
|
|
|
|
|
|
|
@spaces.GPU() |
|
|
def generate_image_newbie(prompt, negative_prompt, system_prompt, height, width, num_inference_steps, guidance_scale, cfg_trunc_ratio, cfg_normalization, seed, sigmas_factor, sampler, schedule_type, image, strength, progress=gr.Progress(track_tqdm=True)): |
|
|
if seed < 0: |
|
|
seed = random.randint(0, 2**32 - 1) |
|
|
|
|
|
generator = torch.Generator("cuda").manual_seed(int(seed)) |
|
|
|
|
|
total_tokens, seq_len = get_newbie_token_details(prompt, system_prompt, pipe_newbie.tokenizer_2) |
|
|
if total_tokens > NEWBIE_TOKEN_LIMIT: |
|
|
raise ValueError(f"The prompt is longer than the allowed limit of {NEWBIE_TOKEN_LIMIT} tokens.") |
|
|
seq_len = min(seq_len, NEWBIE_TOKEN_LIMIT) |
|
|
|
|
|
pipeline_args = { |
|
|
"prompt": prompt, |
|
|
"negative_prompt": negative_prompt, |
|
|
"height": int(height), |
|
|
"width": int(width), |
|
|
"num_inference_steps": int(num_inference_steps), |
|
|
"guidance_scale": guidance_scale, |
|
|
"system_prompt": system_prompt, |
|
|
"cfg_trunc_ratio": cfg_trunc_ratio, |
|
|
"cfg_normalization": cfg_normalization, |
|
|
"generator": generator, |
|
|
"max_sequence_length": int(seq_len) |
|
|
} |
|
|
|
|
|
if sigmas_factor != 1.0: |
|
|
steps = int(num_inference_steps) |
|
|
sigmas = np.linspace(1.0, 1 / steps, steps) |
|
|
sigmas = sigmas * sigmas_factor |
|
|
pipeline_args["sigmas"] = sigmas |
|
|
|
|
|
if image is not None: |
|
|
pipe_task_nb = pipe_newbie_img2img |
|
|
if isinstance(image, np.ndarray): |
|
|
img_pil = Image.fromarray(image) |
|
|
else: |
|
|
img_pil = Image.open(image) |
|
|
img_pil.thumbnail((width, height), Image.Resampling.LANCZOS) |
|
|
pipeline_args["image"] = img_pil |
|
|
pipeline_args["strength"] = strength |
|
|
else: |
|
|
pipe_task_nb = pipe_newbie |
|
|
|
|
|
set_sampler(pipe_task_nb, sampler, schedule_type, newbie_default_scheduler) |
|
|
|
|
|
image = pipe_task_nb(**pipeline_args).images[0] |
|
|
pipe_task_nb.scheduler = newbie_default_scheduler |
|
|
|
|
|
return image, seed |
|
|
|
|
|
|
|
|
@spaces.GPU() |
|
|
def generate_image_pony(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, sigmas_factor, seed, progress=gr.Progress(track_tqdm=True)): |
|
|
if seed < 0: |
|
|
seed = random.randint(0, 2**32 - 1) |
|
|
|
|
|
generator = torch.Generator("cuda").manual_seed(int(seed)) |
|
|
|
|
|
pipeline_args = { |
|
|
"prompt": prompt, |
|
|
"negative_prompt": negative_prompt, |
|
|
"height": int(height), |
|
|
"width": int(width), |
|
|
"num_inference_steps": int(num_inference_steps), |
|
|
"guidance_scale": guidance_scale, |
|
|
"generator": generator, |
|
|
} |
|
|
|
|
|
if sigmas_factor != 1.0: |
|
|
steps = int(num_inference_steps) |
|
|
sigmas = np.linspace(1.0, 1 / steps, steps) |
|
|
sigmas = sigmas * sigmas_factor |
|
|
pipeline_args["sigmas"] = sigmas.tolist() |
|
|
|
|
|
image = pipe_pony(**pipeline_args).images[0] |
|
|
return image, seed |
|
|
|
|
|
|
|
|
@spaces.GPU() |
|
|
def generate_image_netayume(prompt, negative_prompt, system_prompt, height, width, guidance_scale, num_inference_steps, cfg_trunc_ratio, cfg_normalization, seed, sigmas_factor, progress=gr.Progress(track_tqdm=True)): |
|
|
if seed < 0: |
|
|
seed = random.randint(0, 2**32 - 1) |
|
|
|
|
|
generator = torch.Generator("cuda").manual_seed(int(seed)) |
|
|
|
|
|
pipeline_args = { |
|
|
"prompt": prompt, |
|
|
"negative_prompt": negative_prompt if negative_prompt and negative_prompt.strip() else None, |
|
|
"system_prompt": system_prompt, |
|
|
"height": int(height), |
|
|
"width": int(width), |
|
|
"guidance_scale": guidance_scale, |
|
|
"num_inference_steps": int(num_inference_steps), |
|
|
"cfg_trunc_ratio": cfg_trunc_ratio, |
|
|
"cfg_normalization": cfg_normalization, |
|
|
"generator": generator, |
|
|
} |
|
|
|
|
|
if sigmas_factor != 1.0: |
|
|
steps = int(num_inference_steps) |
|
|
sigmas = np.linspace(1.0, 1 / steps, steps) |
|
|
sigmas = sigmas * sigmas_factor |
|
|
pipeline_args["sigmas"] = sigmas.tolist() |
|
|
|
|
|
image = pipe_netayume(**pipeline_args).images[0] |
|
|
|
|
|
return image, seed |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="Image Generation Playground") as demo: |
|
|
gr.Markdown("# Image Generation Playground") |
|
|
with gr.Tabs(): |
|
|
with gr.Tab(label="NewBie Image"): |
|
|
gr.Markdown("## 🐣 NewBie Image Exp0.1") |
|
|
gr.Markdown("A 3.5B parameter experimental DiT model built on Next-DiT and Lumina insights") |
|
|
with gr.Row(variant="panel"): |
|
|
with gr.Column(scale=2): |
|
|
prompt_newbie = gr.Textbox( |
|
|
label="Prompt", |
|
|
value=BASE_PROMPT_NEWBIE, |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
token_counter_display = gr.HTML( |
|
|
value="<div style='color: #6b7280; font-size: 0.9em; text-align: right;'>Token usage: Calculating...</div>", |
|
|
visible=True |
|
|
) |
|
|
|
|
|
negative_prompt_newbie = gr.Textbox( |
|
|
label="Negative Prompt", |
|
|
value=BASE_NEG_PROMPT_NEWBIE, |
|
|
lines=2 |
|
|
) |
|
|
|
|
|
system_prompt_newbie = gr.Dropdown( |
|
|
label="System Prompt", |
|
|
choices=[ |
|
|
"You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.", |
|
|
"You are an advanced assistant designed to generate high-quality images from user prompts, utilizing danbooru tags to accurately guide the image creation process.", |
|
|
], |
|
|
allow_custom_value=True, |
|
|
value="You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
height_newbie = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=1264) |
|
|
width_newbie = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=832) |
|
|
with gr.Row(): |
|
|
steps_newbie = gr.Slider(label="Inference Steps", minimum=1, maximum=100, step=1, value=30) |
|
|
guidance_scale_newbie = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, step=0.1, value=6.5) |
|
|
with gr.Row(): |
|
|
sigmas_newbie = gr.Slider(label="Sigmas Factor", info="Lower values increase detail and complexity. Higher values simplify and clean the image.", minimum=0.9, maximum=1.1, step=0.001, value=0.99) |
|
|
seed_newbie = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) |
|
|
|
|
|
with gr.Accordion("More settings", open=False): |
|
|
with gr.Row(): |
|
|
sampler_newbie = gr.Dropdown(label="Sampler", choices=SAMPLER_NEWBIE, value="FlowMatch DPM++ 2M SDE") |
|
|
schedule_type_newbie = gr.Dropdown(label="Schedule Type", choices=FLUX_SCHEDULE_TYPE_OPTIONS, value=FLUX_SCHEDULE_TYPE_OPTIONS[0]) |
|
|
with gr.Row(): |
|
|
cfg_norm_newbie = gr.Checkbox(label="CFG Normalization", value=True) |
|
|
cfg_trunc_newbie = gr.Slider(label="CFG Truncation Ratio", minimum=0.0, maximum=1.0, step=0.05, value=1.0) |
|
|
|
|
|
with gr.Row(): |
|
|
image_newbie = gr.Image(label="Reference image", interactive=True) |
|
|
strength_newbie = gr.Slider(label="Reference Image Adherence", info="Lower values = strong adherence; higher values = weak adherence.", minimum=0.1, maximum=1., step=0.01, value=0.65) |
|
|
|
|
|
generate_btn_newbie = gr.Button("Generate", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
image_output_newbie = gr.Image(label="Generated Image", format="png", interactive=False) |
|
|
used_seed_newbie = gr.Number(label="Used Seed", interactive=False) |
|
|
|
|
|
gr.Examples( |
|
|
examples=EXAMPLES_NEWBIE, |
|
|
inputs=[prompt_newbie], |
|
|
label="Example Prompts" |
|
|
) |
|
|
|
|
|
with gr.Tab(label="Pony v7"): |
|
|
gr.Markdown("## ✨ Pony v7 AuraFlow") |
|
|
gr.Markdown("Generate images from text prompts using the AuraFlow model.") |
|
|
with gr.Row(variant="panel"): |
|
|
with gr.Column(scale=2): |
|
|
prompt_pony = gr.Textbox(label="Prompt", value="Score_9, ", lines=3) |
|
|
neg_prompt_pony = gr.Textbox( |
|
|
label="Negative Prompt", |
|
|
value=BASE_NEG_PROMPT_PONY7, |
|
|
lines=3 |
|
|
) |
|
|
with gr.Row(): |
|
|
height_pony = gr.Slider(label="Height", minimum=512, maximum=1536, step=64, value=1024) |
|
|
width_pony = gr.Slider(label="Width", minimum=512, maximum=1536, step=64, value=1024) |
|
|
with gr.Row(): |
|
|
steps_pony = gr.Slider(label="Inference Steps", minimum=1, maximum=100, step=1, value=30) |
|
|
cfg_pony = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, step=0.1, value=3.5) |
|
|
with gr.Row(): |
|
|
sigmas_pony = gr.Slider(label="Sigmas Factor", minimum=0.95, maximum=1.05, step=0.01, value=.99) |
|
|
seed_pony = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) |
|
|
|
|
|
generate_btn_pony = gr.Button("Generate", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
image_output_pony = gr.Image(label="Generated Image", format="png", interactive=False) |
|
|
used_seed_pony = gr.Number(label="Used Seed", interactive=False) |
|
|
|
|
|
with gr.Tab(label="NetaYume v3.5"): |
|
|
gr.Markdown("## 🌌 NetaYume v3.5 Lumina") |
|
|
gr.Markdown("Generate images from text prompts using the Lumina 2 model with a focus on anime aesthetics.") |
|
|
with gr.Row(variant="panel"): |
|
|
with gr.Column(scale=2): |
|
|
prompt_neta = gr.Textbox( |
|
|
label="Prompt", |
|
|
value=BASE_PROMPT_NETA, |
|
|
lines=5 |
|
|
) |
|
|
neg_prompt_neta = gr.Textbox(label="Negative Prompt", value="low quality, bad quality, blurry, low resolution, deformed, ugly, bad anatomy", placeholder="Enter concepts to avoid...", lines=2) |
|
|
system_prompt_neta = gr.Dropdown( |
|
|
label="System Prompt", |
|
|
choices=[ |
|
|
"You are an advanced assistant designed to generate high-quality images from user prompts, utilizing danbooru tags to accurately guide the image creation process.", |
|
|
"You are an assistant designed to generate high-quality images based on user prompts and danbooru tags.", |
|
|
"You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.", |
|
|
"You are an assistant designed to generate high-quality images with the highest degree of image-text alignment based on textual prompts." |
|
|
], |
|
|
value="You are an advanced assistant designed to generate high-quality images from user prompts, utilizing danbooru tags to accurately guide the image creation process." |
|
|
) |
|
|
with gr.Row(): |
|
|
height_neta = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=1536) |
|
|
width_neta = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1024) |
|
|
with gr.Row(): |
|
|
cfg_neta = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, step=0.1, value=4.0) |
|
|
steps_neta = gr.Slider(label="Sampling Steps", minimum=10, maximum=100, step=1, value=50) |
|
|
with gr.Row(): |
|
|
cfg_trunc_neta = gr.Slider(label="CFG Truncation Ratio", minimum=0.0, maximum=10.0, step=0.1, value=6.0) |
|
|
sigmas_neta = gr.Slider(label="Sigmas Factor", minimum=0.9, maximum=1.1, step=0.01, value=1.0) |
|
|
with gr.Row(): |
|
|
cfg_norm_neta = gr.Checkbox(label="CFG Normalization", value=False) |
|
|
seed_neta = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) |
|
|
|
|
|
generate_btn_neta = gr.Button("Generate", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
image_output_neta = gr.Image(label="Generated Image", format="png", interactive=False) |
|
|
used_seed_neta = gr.Number(label="Used Seed", interactive=False) |
|
|
|
|
|
prompt_newbie.change( |
|
|
fn=check_token_count, |
|
|
inputs=[prompt_newbie, system_prompt_newbie], |
|
|
outputs=token_counter_display, |
|
|
show_progress="hidden", |
|
|
queue=False, |
|
|
trigger_mode="always_last", |
|
|
api_name=False |
|
|
) |
|
|
system_prompt_newbie.change( |
|
|
fn=check_token_count, |
|
|
inputs=[prompt_newbie, system_prompt_newbie], |
|
|
outputs=token_counter_display, |
|
|
show_progress="hidden", |
|
|
queue=False, |
|
|
trigger_mode="always_last", |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
demo.load( |
|
|
fn=check_token_count, |
|
|
inputs=[prompt_newbie, system_prompt_newbie], |
|
|
outputs=token_counter_display, |
|
|
queue=False, |
|
|
trigger_mode="always_last", |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
generate_btn_newbie.click( |
|
|
fn=generate_image_newbie, |
|
|
inputs=[ |
|
|
prompt_newbie, |
|
|
negative_prompt_newbie, |
|
|
system_prompt_newbie, |
|
|
height_newbie, |
|
|
width_newbie, |
|
|
steps_newbie, |
|
|
guidance_scale_newbie, |
|
|
cfg_trunc_newbie, |
|
|
cfg_norm_newbie, |
|
|
seed_newbie, |
|
|
sigmas_newbie, |
|
|
sampler_newbie, |
|
|
schedule_type_newbie, |
|
|
image_newbie, |
|
|
strength_newbie, |
|
|
], |
|
|
outputs=[image_output_newbie, used_seed_newbie] |
|
|
) |
|
|
|
|
|
generate_btn_pony.click( |
|
|
fn=generate_image_pony, |
|
|
inputs=[prompt_pony, neg_prompt_pony, height_pony, width_pony, steps_pony, cfg_pony, sigmas_pony, seed_pony], |
|
|
outputs=[image_output_pony, used_seed_pony] |
|
|
) |
|
|
|
|
|
generate_btn_neta.click( |
|
|
fn=generate_image_netayume, |
|
|
inputs=[prompt_neta, neg_prompt_neta, system_prompt_neta, height_neta, width_neta, cfg_neta, steps_neta, cfg_trunc_neta, cfg_norm_neta, seed_neta, sigmas_neta], |
|
|
outputs=[image_output_neta, used_seed_neta] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(show_error=True) |