import sharp from 'sharp'; import path from "path"; import { mkdir, writeFile } from "fs/promises"; import { fileURLToPath } from "url"; import { callOllama, OLLAMA_MODEL } from "./ollamaClient.js"; const COMFYUI_ENABLED = process.env.COMFYUI_ENABLED !== 'false'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const COMFYUI_URL = process.env.COMFYUI_URL || "http://localhost:8188"; // Drawing style prefix const STYLE_PREFIX = `clean line art, minimalist sketch, concept art, black and white line drawing, lots of white space, sparse shading, simple black hatching, very low detail`; const ACCENT_COLORS = ["red", "blue", "yellow", "green", "purple", "orange"]; function selectRandomAccentColor() { return ACCENT_COLORS[Math.floor(Math.random() * ACCENT_COLORS.length)]; } async function upscaleImage(inputPath, outputPath, width, height) { try { await sharp(inputPath) .resize(width, height, { kernel: 'lanczos3' }) .blur(0.3) .sharpen({ sigma: 1, flat: 1, jagged: 2, }) .png({ compressionLevel: 9, adaptiveFiltering: true, palette: true }) .toFile(outputPath); console.log(`Upscaled + compressed PNG saved: ${outputPath}`); return outputPath; } catch (err) { console.error("Error during upscaling:", err.message); return null; } } // 1. Generate engineered visual prompt async function generateVisualPrompt(flavor) { const rawPrompt = await callOllama( `You are a prompt engineer specializing in visual prompts for AI image generation. Your goal is to translate fantasy flavor text into a sparse, minimalist scene description. Your output must be a simple list of visual tags describing only the most essential elements of the scene. Focus on the core subject and mood. Rules: - Describe a sparse scene with a single focal point or landscape. - Use only 3-5 key descriptive phrases or tags. - The entire output should be very short, 20-50 words maximum. - Do NOT repeat wording from the input. - Describe only the visual elements of the image. Focus on colors, shapes, textures, and spatial relationships. - Exclude any references to style, medium, camera effects, sounds, hypothetical scenarios, or physical sensations. - Avoid describing fine details; focus on large forms and the overall impression. - Do NOT include phrases like “an image of” or “a scene showing”. - Do NOT include the word "Obsidian" or "obsidian" at all. Input: ${flavor} Output:`, OLLAMA_MODEL, 3, "Generate Visual Prompt" ); const accentColor = selectRandomAccentColor(); return `${STYLE_PREFIX}, on white paper, monochrome with a single accent of ${accentColor}, ${rawPrompt.trim().replace(/\n/g, " ")}`; } // 2. Save image buffer async function saveImage(buffer, filename) { const filepath = path.join(__dirname, filename); await mkdir(__dirname, { recursive: true }); await writeFile(filepath, buffer); console.log(`Saved image: ${filepath}`); return filepath; } // 3. Build workflow payload function buildComfyWorkflow(promptText, negativeText = "") { return { "3": { "inputs": { "seed": Math.floor(Math.random() * 100000), "steps": 4, "cfg": 1, "sampler_name": "euler", "scheduler": "simple", "denoise": 1, "model": ["4", 0], "positive": ["6", 0], "negative": ["7", 0], "latent_image": ["5", 0] }, "class_type": "KSampler" }, "4": { "inputs": { "unet_name": "flux1-schnell-fp8.safetensors", "weight_dtype": "fp8_e4m3fn" }, "class_type": "UNETLoader" }, "5": { "inputs": { "width": 728, "height": 512, "batch_size": 1 }, "class_type": "EmptyLatentImage" }, "6": { "inputs": { "text": promptText, "clip": ["10", 0] }, "class_type": "CLIPTextEncode" }, "7": { "inputs": { "text": negativeText, "clip": ["10", 0] }, "class_type": "CLIPTextEncode" }, "10": { "inputs": { "clip_name1": "clip_l.safetensors", "clip_name2": "t5xxl_fp8_e4m3fn.safetensors", "type": "flux" }, "class_type": "DualCLIPLoader" }, "11": { "inputs": { "vae_name": "ae.safetensors" }, "class_type": "VAELoader" }, "8": { "inputs": { "samples": ["3", 0], "vae": ["11", 0] }, "class_type": "VAEDecode" }, "9": { "inputs": { "filename_prefix": "ComfyUI_Flux", "images": ["8", 0] }, "class_type": "SaveImage" } }; } // 4a. Wait for ComfyUI to finish image generation async function waitForImage(promptId, timeout = 900000) { const start = Date.now(); while (Date.now() - start < timeout) { const res = await fetch(`${COMFYUI_URL}/history`); const data = await res.json(); const historyEntry = data[promptId]; if (historyEntry?.outputs) { const images = Object.values(historyEntry.outputs).flatMap(o => o.images || []); if (images.length > 0) return images.map(i => i.filename); } await new Promise(resolve => setTimeout(resolve, 1000)); } throw new Error("Timed out waiting for ComfyUI image result."); } // 4b. Download image from ComfyUI server async function downloadImage(filename, localFilename) { const url = `${COMFYUI_URL}/view?filename=${filename}`; const res = await fetch(url); if (!res.ok) throw new Error(`Failed to fetch image: ${res.statusText}`); const buffer = Buffer.from(await res.arrayBuffer()); return await saveImage(buffer, localFilename); } // 4c. Submit prompt and handle full image pipeline async function generateImageViaComfyUI(prompt, filename) { const negativePrompt = `heavy shading, deep blacks, dark, gritty, shadow-filled, chiaroscuro, scratchy lines, photorealism, hyper-realistic, high detail, 3D render, CGI, polished, smooth shading, detailed textures, noisy, cluttered, blurry, text, logo, signature, watermark, artist name, branding, ugly, deformed, unnatural patterns, perfect curves, repetitive textures`; const workflow = buildComfyWorkflow(prompt, negativePrompt); try { console.log("Submitting prompt to ComfyUI..."); const res = await fetch(`${COMFYUI_URL}/prompt`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ prompt: workflow }) }); if (!res.ok) { throw new Error(`ComfyUI error: ${res.statusText}`); } const { prompt_id } = await res.json(); console.log("Waiting for image result..."); const filenames = await waitForImage(prompt_id); if (filenames.length === 0) throw new Error("No image generated"); const comfyFilename = filenames[0]; console.log("Downloading image..."); const filepath = await downloadImage(comfyFilename, filename); return filepath; } catch (err) { console.error("Error generating image:", err.message); return null; } } // 5. Main export export async function generateDungeonImages({ flavor }) { console.log("Generating dungeon image..."); if (!COMFYUI_ENABLED) { console.log("ComfyUI image generation disabled via .env; using existing upscaled image."); return path.join(__dirname, "dungeon_upscaled.png"); } const finalPrompt = await generateVisualPrompt(flavor); console.log("Engineered visual prompt:\n", finalPrompt); const baseFilename = `dungeon.png`; const upscaledFilename = `dungeon_upscaled.png`; const filepath = await generateImageViaComfyUI(finalPrompt, baseFilename); if (!filepath) { throw new Error("Failed to generate dungeon image."); } // Upscale 2x (half of A4 at 300dpi) const upscaledPath = await upscaleImage(filepath, upscaledFilename, 1456, 1024); if (!upscaledPath) { throw new Error("Failed to upscale dungeon image."); } return upscaledPath; }