Initial commit

Browse files

Files changed (6) hide show

.gitattributes +3 -0
.python-version +1 -0
inference_flux_model.py +82 -0
inference_pixart_custom_redux.py +89 -0
inference_pixart_flux_redux.py +92 -0
pyproject.toml +16 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+flux-image-variations-model/** filter=lfs diff=lfs merge=lfs -text
+pixart-custom-redux/** filter=lfs diff=lfs merge=lfs -text
+pixart-flux-redux/** filter=lfs diff=lfs merge=lfs -text

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.9

inference_flux_model.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import argparse
+import time
+import torch
+from diffusers import FluxTransformer2DModel
+from transformers import CLIPModel
+from pathlib import Path
+from PIL import Image
+from open_flux_pipeline import FluxWithCFGPipeline
+pipe = None
+def generate(prompt, image_prompt=None, guidance_scale=2, num_images=4, resolution=512):
+    # Create blank image prompt backgrounds
+    image_prompt_kwargs = {
+        "image_prompt": Image.new("RGB", (resolution, resolution)),
+        "negative_image_prompt": Image.new("RGB", (resolution, resolution)),
+    }
+    if image_prompt is not None:
+        image_prompt_kwargs["image_prompt"] = image_prompt
+    with torch.no_grad():
+        images = pipe(
+            prompt=prompt,
+            negative_prompt="",
+            height=resolution,
+            width=resolution,
+            max_sequence_length=256,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=num_images,
+            **image_prompt_kwargs
+        ).images
+        # Concatenate all images horizontally
+        widths, heights = zip(*[img.size for img in images])
+        total_width = sum(widths) + len(images) - 1
+        max_height = max(heights)
+        out = Image.new('RGB', (total_width, max_height))
+        x_offset = 0
+        for img in images:
+            out.paste(img, (x_offset, 0))
+            x_offset += img.width + 1
+        # If an image prompt was provided, stack it above the generated images
+        if image_prompt is not None:
+            out_with_image_prompt = Image.new('RGB', (out.width, out.height + 1 + resolution))
+            resized_prompt = image_prompt.resize((resolution, resolution), Image.Resampling.BILINEAR)
+            out_with_image_prompt.paste(resized_prompt, (0, 0))
+            out_with_image_prompt.paste(out, (0, resolution + 1))
+            out = out_with_image_prompt
+    # Ensure the output directory exists and save the final image
+    Path("image-outputs").mkdir(parents=True, exist_ok=True)
+    output_filename = f"image-outputs/{prompt[:40].replace(' ', '_')}.{int(time.time())}.png"
+    out.save(output_filename)
+    print(f"Saved output to {output_filename}")
+def main():
+    parser = argparse.ArgumentParser(description="Generate images using an image and a text prompt (Flux Image Variations).")
+    parser.add_argument("--prompt", type=str, default="", help='The text prompt for image generation (default "")')
+    parser.add_argument("--image_prompt", type=str, default=None,
+                        help="Path to an optional image to use as a prompt")
+    parser.add_argument("--guidance_scale", type=float, default=2,
+                        help="Guidance scale for image generation (default: 2)")
+    parser.add_argument("--num_images", type=int, default=4,
+                        help="Number of images to generate (default: 4)")
+    parser.add_argument("--resolution", type=int, default=512,
+                        help="Resolution for generated images (default: 512)")
+    args = parser.parse_args()
+    # Load models and pipelines
+    global pipe
+    clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16)
+    pipe = FluxWithCFGPipeline.from_pretrained("ostris/OpenFLUX.1", text_encoder=clip, transformer=None, torch_dtype=torch.bfloat16)
+    pipe.transformer = FluxTransformer2DModel.from_pretrained("flux-image-variations-model", torch_dtype=torch.bfloat16)
+    pipe.to("cuda")
+    img_prompt = Image.open(args.image_prompt) if args.image_prompt else None
+    generate(args.prompt, image_prompt=img_prompt, guidance_scale=args.guidance_scale,
+             num_images=args.num_images, resolution=args.resolution)
+if __name__ == "__main__":
+    main()

inference_pixart_custom_redux.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import argparse
+import time
+import torch
+from diffusers import PixArtAlphaPipeline
+from diffusers.pipelines.flux import FluxPriorReduxPipeline
+from diffusers.pipelines.flux.modeling_flux import ReduxImageEncoder
+from transformers import SiglipImageProcessor
+from pathlib import Path
+from PIL import Image
+pipe = None
+redux = None
+redux_embedder = None
+def generate(prompt, image_prompt=None, guidance_scale=2, num_images=4, resolution=512):
+    with torch.no_grad():
+        clip_image_processor = SiglipImageProcessor(size={"height": 384, "width": 384})
+        clip_pixel_values = clip_image_processor.preprocess(
+            image_prompt.convert("RGB"), return_tensors="pt"
+        ).pixel_values.to("cuda", dtype=torch.bfloat16)
+        image_prompt_latents = redux.image_encoder(clip_pixel_values).last_hidden_state
+        image_prompt_embeds = redux_embedder(image_prompt_latents).image_embeds
+        prompt_embeds = image_prompt_embeds[:, :120, :]
+        attention_mask = torch.ones(prompt_embeds.shape[0], prompt_embeds.shape[1]).to("cuda")
+        images = pipe(
+            prompt_embeds=prompt_embeds,
+            prompt_attention_mask=attention_mask,
+            negative_prompt="",
+            height=resolution,
+            width=resolution,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=num_images,
+        ).images
+        # Concatenate all images horizontally
+        widths, heights = zip(*[img.size for img in images])
+        total_width = sum(widths) + len(images) - 1
+        max_height = max(heights)
+        out = Image.new('RGB', (total_width, max_height))
+        x_offset = 0
+        for img in images:
+            out.paste(img, (x_offset, 0))
+            x_offset += img.width + 1
+        # If an image prompt was provided, stack it above the generated images
+        if image_prompt is not None:
+            out_with_image_prompt = Image.new('RGB', (out.width, out.height + 1 + resolution))
+            resized_prompt = image_prompt.resize((resolution, resolution), Image.Resampling.BILINEAR)
+            out_with_image_prompt.paste(resized_prompt, (0, 0))
+            out_with_image_prompt.paste(out, (0, resolution + 1))
+            out = out_with_image_prompt
+    Path("image-outputs").mkdir(parents=True, exist_ok=True)
+    output_filename = f"image-outputs/{prompt[:40].replace(' ', '_')}.{int(time.time())}.png"
+    out.save(output_filename)
+    print(f"Saved output to {output_filename}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate images using an image and a text prompt (PixArt Custom Redux)."
+    )
+    parser.add_argument("--prompt", type=str, default="",
+                        help='The text prompt for image generation (default: "")')
+    parser.add_argument("--image_prompt", type=str, default=None,
+                        help="Path to an optional image to use as a prompt")
+    parser.add_argument("--guidance_scale", type=float, default=2,
+                        help="Guidance scale for image generation (default: 2)")
+    parser.add_argument("--num_images", type=int, default=4,
+                        help="Number of images to generate (default: 4)")
+    parser.add_argument("--resolution", type=int, default=512,
+                        help="Resolution for generated images (default: 512)")
+    args = parser.parse_args()
+    global pipe, redux, redux_embedder
+    pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-512x512", torch_dtype=torch.bfloat16)
+    redux_embedder = ReduxImageEncoder.from_pretrained("pixart-custom-redux", torch_dtype=torch.bfloat16)
+    redux = FluxPriorReduxPipeline.from_pretrained("FLUX.1-Redux-dev", image_embedder=redux_embedder, torch_dtype=torch.bfloat16)
+    pipe.to("cuda")
+    redux.to("cuda")
+    img_prompt = Image.open(args.image_prompt) if args.image_prompt else None
+    generate(args.prompt, image_prompt=img_prompt, guidance_scale=args.guidance_scale,
+             num_images=args.num_images, resolution=args.resolution)
+if __name__ == "__main__":
+    main()

inference_pixart_flux_redux.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import argparse
+import time
+import torch
+from diffusers import PixArtAlphaPipeline, PixArtTransformer2DModel
+from diffusers.pipelines.flux import FluxPriorReduxPipeline
+from transformers import SiglipImageProcessor
+from pathlib import Path
+from PIL import Image
+pipe = None
+redux = None
+redux_embedder = None
+def generate(prompt, image_prompt=None, guidance_scale=2, num_images=4, resolution=512):
+    with torch.no_grad():
+        clip_image_processor = SiglipImageProcessor(size={"height": 384, "width": 384})
+        clip_pixel_values = clip_image_processor.preprocess(
+            image_prompt.convert("RGB"), return_tensors="pt"
+        ).pixel_values.to("cuda", dtype=torch.bfloat16)
+        image_prompt_latents = redux.image_encoder(clip_pixel_values).last_hidden_state
+        image_prompt_embeds = redux_embedder(image_prompt_latents).image_embeds
+        prompt_embeds = image_prompt_embeds[:, :120, :] * 0.04
+        attention_mask = torch.ones(prompt_embeds.shape[0], prompt_embeds.shape[1]).to("cuda")
+        images = pipe(
+            prompt_embeds=prompt_embeds,
+            prompt_attention_mask=attention_mask,
+            negative_prompt="",
+            height=resolution,
+            width=resolution,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=num_images,
+        ).images
+        # Concatenate all images horizontally
+        widths, heights = zip(*[img.size for img in images])
+        total_width = sum(widths) + len(images) - 1
+        max_height = max(heights)
+        out = Image.new('RGB', (total_width, max_height))
+        x_offset = 0
+        for img in images:
+            out.paste(img, (x_offset, 0))
+            x_offset += img.width + 1
+        # If an image prompt was provided, stack it above the generated images
+        if image_prompt is not None:
+            out_with_image_prompt = Image.new('RGB', (out.width, out.height + 1 + resolution))
+            resized_prompt = image_prompt.resize((resolution, resolution), Image.Resampling.BILINEAR)
+            out_with_image_prompt.paste(resized_prompt, (0, 0))
+            out_with_image_prompt.paste(out, (0, resolution + 1))
+            out = out_with_image_prompt
+    Path("image-outputs").mkdir(parents=True, exist_ok=True)
+    output_filename = f"image-outputs/{prompt[:40].replace(' ', '_')}.{int(time.time())}.png"
+    out.save(output_filename)
+    print(f"Saved output to {output_filename}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate images using an image and a text prompt (PixArt Flux Redux)."
+    )
+    parser.add_argument("--prompt", type=str, default="",
+                        help='The text prompt for image generation (default: "")')
+    parser.add_argument("--image_prompt", type=str, default=None,
+                        help="Path to an optional image to use as a prompt")
+    parser.add_argument("--guidance_scale", type=float, default=2,
+                        help="Guidance scale for image generation (default: 2)")
+    parser.add_argument("--num_images", type=int, default=4,
+                        help="Number of images to generate (default: 4)")
+    parser.add_argument("--resolution", type=int, default=512,
+                        help="Resolution for generated images (default: 512)")
+    args = parser.parse_args()
+    global pipe, redux, redux_embedder
+    pipe = PixArtAlphaPipeline.from_pretrained(
+        "PixArt-alpha/PixArt-XL-2-512x512", transformer=None, torch_dtype=torch.bfloat16
+    )
+    transformer = PixArtTransformer2DModel.from_pretrained("pixart-flux-redux", torch_dtype=torch.bfloat16)
+    pipe.transformer = transformer
+    redux = FluxPriorReduxPipeline.from_pretrained("FLUX.1-Redux-dev", torch_dtype=torch.bfloat16)
+    redux_embedder = redux.image_embedder
+    redux.to("cuda")
+    pipe.to("cuda")
+    img_prompt = Image.open(args.image_prompt) if args.image_prompt else None
+    generate(args.prompt, image_prompt=img_prompt, guidance_scale=args.guidance_scale,
+             num_images=args.num_images, resolution=args.resolution)
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,16 @@

+[project]
+name = "image-variations-experiment"
+description = "Experimental Flux/PixArt finetunes for image variations"
+version = "0.1.0"
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+    "accelerate>=1.4.0",
+    "diffusers>=0.32.2",
+    "pillow>=11.1.0",
+    "protobuf>=5.29.3",
+    "sentencepiece>=0.2.0",
+    "torch==2.5.1",
+    "torchvision==0.20.1",
+    "transformers==4.46.1",
+]