import path from "path"; import { mkdir, writeFile } from "fs/promises"; import { fileURLToPath } from "url"; import { callOllama } from "./ollamaClient.js"; import { compressPng } from "./compressPng.js"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const COMFYUI_URL = process.env.COMFYUI_URL || "http://localhost:8188"; // Drawing style prefix const STYLE_PREFIX = `a high-contrast, black and white pen and ink drawing, hand-drawn sketch aesthetic, very low detail, extremely minimal, visible loose linework, expressive simple hatching for shadows, quick conceptual sketch, subtle color accent`; // 1. Generate engineered visual prompt async function generateVisualPrompt(flavor) { const rawPrompt = await callOllama( `You are a prompt engineer specializing in visual prompts for AI image generation (e.g., Stable Diffusion, ComfyUI). Given a piece of fantasy flavor text, your job is to extract and translate the visual elements into a highly descriptive image prompt. Your output should be structured like a list of visual tags, not a story or paragraph. Focus on describing the environment, mood, architecture, lighting, materials, and color. Avoid abstract, emotional, and visual language. Be literal, specific, and visual only. Rules: - Do NOT repeat phrases or wording from the input. - Only include things that could be seen in a single, still image. - Use visual keywords, rich adjectives, and clear scene descriptors. - Keep the prompt concise, 40-80 words. - Maintain simplicity in descriptions. The image must be a minimal, hand drawn sketch aesthetic with low detail. - Avoid characters or creatures unless clearly described. - Avoid referencing rendering style, color technique, camera effects, or drawing medium — focus only on the visual content of the scene. - Do NOT include phrases like “an image of” or “a scene showing”. Input: ${flavor} Output:`, "gemma3n:e4b", 3, "Generate Visual Prompt" ); return `${STYLE_PREFIX}, ${rawPrompt.trim().replace(/\n/g, " ")}`; } // 2. Save image buffer async function saveImage(buffer, filename) { const filepath = path.join(__dirname, filename); await mkdir(__dirname, { recursive: true }); await writeFile(filepath, buffer); console.log(`✅ Saved image: ${filepath}`); return filepath; } // 3. Build workflow payload function buildComfyWorkflow(promptText, negativeText = "") { return { "3": { "inputs": { "seed": Math.floor(Math.random() * 100000), "steps": 4, "cfg": 1, "sampler_name": "euler", "scheduler": "simple", "denoise": 1, "model": ["4", 0], "positive": ["6", 0], "negative": ["7", 0], "latent_image": ["5", 0] }, "class_type": "KSampler" }, "4": { "inputs": { "unet_name": "flux1-schnell-fp8.safetensors", "weight_dtype": "fp8_e4m3fn" }, "class_type": "UNETLoader" }, "5": { "inputs": { "width": 640, "height": 448, "batch_size": 1 }, "class_type": "EmptyLatentImage" }, "6": { "inputs": { "text": promptText, "clip": ["10", 0] }, "class_type": "CLIPTextEncode" }, "7": { "inputs": { "text": negativeText, "clip": ["10", 0] }, "class_type": "CLIPTextEncode" }, "10": { "inputs": { "clip_name1": "clip_l.safetensors", "clip_name2": "t5xxl_fp8_e4m3fn.safetensors", "type": "flux" }, "class_type": "DualCLIPLoader" }, "11": { "inputs": { "vae_name": "ae.safetensors" }, "class_type": "VAELoader" }, "8": { "inputs": { "samples": ["3", 0], "vae": ["11", 0] }, "class_type": "VAEDecode" }, "9": { "inputs": { "filename_prefix": "ComfyUI_Flux", "images": ["8", 0] }, "class_type": "SaveImage" } }; } // 4a. Wait for ComfyUI to finish image generation async function waitForImage(promptId, timeout = 900000) { const start = Date.now(); while (Date.now() - start < timeout) { const res = await fetch(`${COMFYUI_URL}/history`); const data = await res.json(); const historyEntry = data[promptId]; if (historyEntry?.outputs) { const images = Object.values(historyEntry.outputs).flatMap(o => o.images || []); if (images.length > 0) return images.map(i => i.filename); } await new Promise(resolve => setTimeout(resolve, 1000)); } throw new Error("Timed out waiting for ComfyUI image result."); } // 4b. Download image from ComfyUI server async function downloadImage(filename, localFilename) { const url = `${COMFYUI_URL}/view?filename=${filename}`; const res = await fetch(url); if (!res.ok) throw new Error(`Failed to fetch image: ${res.statusText}`); const buffer = Buffer.from(await res.arrayBuffer()); return await saveImage(buffer, localFilename); } // 4c. Submit prompt and handle full image pipeline async function generateImageViaComfyUI(prompt, filename) { const workflow = buildComfyWorkflow(prompt, "photorealism, hyper-realistic, high detail, 3D render, CGI, ray tracing, glossy, polished, smooth shading, realistic lighting, digital painting, anime, cartoon, pixelated, noisy, cluttered, blurry, sharp focus, symmetrical, perfect perspective, detailed textures, high-resolution textures, high contrast lighting, lens flare, bokeh, camera artifacts, text, logo, signature, watermark, overexposed, underexposed, glowing edges"); try { console.log("Submitting prompt to ComfyUI..."); const res = await fetch(`${COMFYUI_URL}/prompt`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ prompt: workflow }) }); if (!res.ok) { throw new Error(`ComfyUI error: ${res.statusText}`); } const { prompt_id } = await res.json(); console.log("Waiting for image result..."); const filenames = await waitForImage(prompt_id); if (filenames.length === 0) throw new Error("No image generated"); const comfyFilename = filenames[0]; console.log("Downloading image..."); const filepath = await downloadImage(comfyFilename, filename); console.log("Compressing PNG..."); await compressPng(filepath); return filepath; } catch (err) { console.error("Error generating image:", err.message); return null; } } // 5. Main export export async function generateDungeonImages({ flavor }) { console.log("Generating dungeon image..."); const finalPrompt = await generateVisualPrompt(flavor); console.log("Engineered visual prompt:\n", finalPrompt); const filename = `dungeon.png`; const filepath = await generateImageViaComfyUI(finalPrompt, filename); if (!filepath) { throw new Error("Failed to generate dungeon image."); } return filepath; }