rework to allow for image gen

2025-09-04 16:52:13 -04:00
parent af315783e0
commit 1e1d745e55
9 changed files with 372 additions and 135 deletions
--- a/imageGenerator.js
+++ b/imageGenerator.js
@@ -0,0 +1,207 @@
+import path from "path";
+import { mkdir, writeFile } from "fs/promises";
+import { fileURLToPath } from "url";
+import { callOllama } from "./ollamaClient.js";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const COMFYUI_URL = process.env.COMFYUI_URL || "http://localhost:8188";
+
+// Drawing style prefix
+const STYLE_PREFIX = `a high-contrast, black and white pen and ink drawing, hand-drawn sketch aesthetic, very low detail, visible loose linework, expressive simple hatching for shadows, quick conceptual sketch, subtle color accent`;
+
+// 1. Generate engineered visual prompt
+async function generateVisualPrompt(flavor) {
+  const rawPrompt = await callOllama(
+    `You are a prompt engineer specializing in visual prompts for AI image generation (e.g., Stable Diffusion, ComfyUI). Given a piece of fantasy flavor text, your job is to extract and translate the visual elements into a highly descriptive image prompt.
+
+Your output should be structured like a list of visual tags, not a story or paragraph. Focus on describing the environment, mood, architecture, lighting, materials, and color. Avoid abstract, emotional, and visual language. Be literal, specific, and visual only.
+
+Rules:
+- Do NOT repeat phrases or wording from the input.
+- Only include things that could be seen in a single, still image.
+- Use visual keywords, rich adjectives, and clear scene descriptors.
+- Keep the prompt concise, 40-80 words.
+- Maintain simplicity in descriptions. The image must be a minimal, hand drawn sketch aesthetic with low detail.
+- Avoid characters or creatures unless clearly described.
+- Avoid referencing rendering style, color technique, camera effects, or drawing medium — focus only on the visual content of the scene.
+- Do NOT include phrases like “an image of” or “a scene showing”.
+
+Input:
+${flavor}
+
+Output:`,
+    "gemma3n:e4b", 3, "Generate Visual Prompt"
+  );
+
+  return `${STYLE_PREFIX}, ${rawPrompt.trim().replace(/\n/g, " ")}`;
+}
+
+// 2. Save image buffer
+async function saveImage(buffer, filename) {
+  const filepath = path.join(__dirname, filename);
+  await mkdir(__dirname, { recursive: true });
+  await writeFile(filepath, buffer);
+  console.log(`✅ Saved image: ${filepath}`);
+  return filepath;
+}
+
+// 3. Build workflow payload
+function buildComfyWorkflow(promptText, negativeText = "") {
+  return {
+    "3": {
+      "inputs": {
+        "seed": Math.floor(Math.random() * 100000),
+        "steps": 4,
+        "cfg": 1,
+        "sampler_name": "euler",
+        "scheduler": "simple",
+        "denoise": 1,
+        "model": ["4", 0],
+        "positive": ["6", 0],
+        "negative": ["7", 0],
+        "latent_image": ["5", 0]
+      },
+      "class_type": "KSampler"
+    },
+    "4": {
+      "inputs": {
+        "unet_name": "flux1-schnell-fp8.safetensors",
+        "weight_dtype": "fp8_e4m3fn"
+      },
+      "class_type": "UNETLoader"
+    },
+    "5": {
+      "inputs": {
+        "width": 1000,
+        "height": 700,
+        "batch_size": 1
+      },
+      "class_type": "EmptyLatentImage"
+    },
+    "6": {
+      "inputs": {
+        "text": promptText,
+        "clip": ["10", 0]
+      },
+      "class_type": "CLIPTextEncode"
+    },
+    "7": {
+      "inputs": {
+        "text": negativeText,
+        "clip": ["10", 0]
+      },
+      "class_type": "CLIPTextEncode"
+    },
+    "10": {
+      "inputs": {
+        "clip_name1": "clip_l.safetensors",
+        "clip_name2": "t5xxl_fp8_e4m3fn.safetensors",
+        "type": "flux"
+      },
+      "class_type": "DualCLIPLoader"
+    },
+    "11": {
+      "inputs": {
+        "vae_name": "ae.safetensors"
+      },
+      "class_type": "VAELoader"
+    },
+    "8": {
+      "inputs": {
+        "samples": ["3", 0],
+        "vae": ["11", 0]
+      },
+      "class_type": "VAEDecode"
+    },
+    "9": {
+      "inputs": {
+        "filename_prefix": "ComfyUI_Flux",
+        "images": ["8", 0]
+      },
+      "class_type": "SaveImage"
+    }
+  };
+}
+
+
+// 4a. Wait for ComfyUI to finish image generation
+async function waitForImage(promptId, timeout = 900000) {
+  const start = Date.now();
+
+  while (Date.now() - start < timeout) {
+    const res = await fetch(`${COMFYUI_URL}/history`);
+    const data = await res.json();
+    const historyEntry = data[promptId];
+
+    if (historyEntry?.outputs) {
+      const images = Object.values(historyEntry.outputs).flatMap(o => o.images || []);
+      if (images.length > 0) return images.map(i => i.filename);
+    }
+
+    await new Promise(resolve => setTimeout(resolve, 1000));
+  }
+
+  throw new Error("Timed out waiting for ComfyUI image result.");
+}
+
+// 4b. Download image from ComfyUI server
+async function downloadImage(filename, localFilename) {
+  const url = `${COMFYUI_URL}/view?filename=${filename}`;
+  const res = await fetch(url);
+
+  if (!res.ok) throw new Error(`Failed to fetch image: ${res.statusText}`);
+  const buffer = Buffer.from(await res.arrayBuffer());
+
+  return await saveImage(buffer, localFilename);
+}
+
+// 4c. Submit prompt and handle full image pipeline
+async function generateImageViaComfyUI(prompt, filename) {
+  const workflow = buildComfyWorkflow(prompt, "text, blurry, lowres, watermark");
+
+  try {
+    console.log("Submitting prompt to ComfyUI...");
+    const res = await fetch(`${COMFYUI_URL}/prompt`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ prompt: workflow })
+    });
+
+    if (!res.ok) {
+      throw new Error(`ComfyUI error: ${res.statusText}`);
+    }
+
+    const { prompt_id } = await res.json();
+
+    console.log("Waiting for image result...");
+    const filenames = await waitForImage(prompt_id);
+    if (filenames.length === 0) throw new Error("No image generated");
+
+    const comfyFilename = filenames[0];
+
+    console.log("Downloading image...");
+    const filepath = await downloadImage(comfyFilename, filename);
+    return filepath;
+
+  } catch (err) {
+    console.error("Error generating image:", err.message);
+    return null;
+  }
+}
+
+// 5. Main export
+export async function generateDungeonImages({ flavor }) {
+  console.log("Generating dungeon image...");
+
+  const finalPrompt = await generateVisualPrompt(flavor);
+  console.log("Engineered visual prompt:\n", finalPrompt);
+
+  const filename = `dungeon.png`;
+  const filepath = await generateImageViaComfyUI(finalPrompt, filename);
+
+  if (!filepath) {
+    throw new Error("Failed to generate dungeon image.");
+  }
+
+  return filepath;
+}