scrollsmith/imageGenerator.js

import sharp from 'sharp';
import path from "path";
import { mkdir, writeFile } from "fs/promises";
import { fileURLToPath } from "url";
import { callOllama } from "./ollamaClient.js";

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const COMFYUI_URL = process.env.COMFYUI_URL || "http://localhost:8188";

// Drawing style prefix
const STYLE_PREFIX = `clean line art, minimalist sketch, concept art sketch, black and white line drawing, lots of white space, sparse shading, very minimal shading, simple black hatching, very low detail, single accent color`;

async function upscaleImage(inputPath, outputPath, width, height) {
  try {
    await sharp(inputPath)
      .resize(width, height, { kernel: 'lanczos3' })
      .blur(0.3)
      .sharpen()
      .png({
        compressionLevel: 9,
        adaptiveFiltering: true,
        palette: true
      })
      .toFile(outputPath);
    console.log(`Upscaled + compressed PNG saved: ${outputPath}`);
    return outputPath;
  } catch (err) {
    console.error("Error during upscaling:", err.message);
    return null;
  }
}

// 1. Generate engineered visual prompt
async function generateVisualPrompt(flavor) {
  const rawPrompt = await callOllama(
    `You are a prompt engineer specializing in visual prompts for AI image generation. Your goal is to translate fantasy flavor text into a sparse, minimalist scene description.

Your output must be a simple list of visual tags describing only the most essential elements of the scene. Focus on the core subject and mood.

Rules:
- Describe a sparse scene with a single focal point or area.
- Use only 3-5 key descriptive phrases or tags.
- The entire output should be very short, 20-50 words maximum.
- Do NOT repeat wording from the input.
- Focus only on visual content, not style, medium, or camera effects.
- Avoid describing fine details; focus on large forms and the overall impression.
- Do NOT include phrases like “an image of” or “a scene showing”.

Input:
${flavor}

Output:`,
    "gemma3n:e4b", 3, "Generate Visual Prompt"
  );

  return `${STYLE_PREFIX}, ${rawPrompt.trim().replace(/\n/g, " ")}`;
}

// 2. Save image buffer
async function saveImage(buffer, filename) {
  const filepath = path.join(__dirname, filename);
  await mkdir(__dirname, { recursive: true });
  await writeFile(filepath, buffer);
  console.log(`Saved image: ${filepath}`);
  return filepath;
}

// 3. Build workflow payload
function buildComfyWorkflow(promptText, negativeText = "") {
  return {
    "3": {
      "inputs": {
        "seed": Math.floor(Math.random() * 100000),
        "steps": 4,
        "cfg": 1,
        "sampler_name": "euler",
        "scheduler": "simple",
        "denoise": 1,
        "model": ["4", 0],
        "positive": ["6", 0],
        "negative": ["7", 0],
        "latent_image": ["5", 0]
      },
      "class_type": "KSampler"
    },
    "4": {
      "inputs": {
        "unet_name": "flux1-schnell-fp8.safetensors",
        "weight_dtype": "fp8_e4m3fn"
      },
      "class_type": "UNETLoader"
    },
    "5": {
      "inputs": {
        "width": 728,
        "height": 512,
        "batch_size": 1
      },
      "class_type": "EmptyLatentImage"
    },
    "6": {
      "inputs": {
        "text": promptText,
        "clip": ["10", 0]
      },
      "class_type": "CLIPTextEncode"
    },
    "7": {
      "inputs": {
        "text": negativeText,
        "clip": ["10", 0]
      },
      "class_type": "CLIPTextEncode"
    },
    "10": {
      "inputs": {
        "clip_name1": "clip_l.safetensors",
        "clip_name2": "t5xxl_fp8_e4m3fn.safetensors",
        "type": "flux"
      },
      "class_type": "DualCLIPLoader"
    },
    "11": {
      "inputs": {
        "vae_name": "ae.safetensors"
      },
      "class_type": "VAELoader"
    },
    "8": {
      "inputs": {
        "samples": ["3", 0],
        "vae": ["11", 0]
      },
      "class_type": "VAEDecode"
    },
    "9": {
      "inputs": {
        "filename_prefix": "ComfyUI_Flux",
        "images": ["8", 0]
      },
      "class_type": "SaveImage"
    }
  };
}


// 4a. Wait for ComfyUI to finish image generation
async function waitForImage(promptId, timeout = 900000) {
  const start = Date.now();

  while (Date.now() - start < timeout) {
    const res = await fetch(`${COMFYUI_URL}/history`);
    const data = await res.json();
    const historyEntry = data[promptId];

    if (historyEntry?.outputs) {
      const images = Object.values(historyEntry.outputs).flatMap(o => o.images || []);
      if (images.length > 0) return images.map(i => i.filename);
    }

    await new Promise(resolve => setTimeout(resolve, 1000));
  }

  throw new Error("Timed out waiting for ComfyUI image result.");
}

// 4b. Download image from ComfyUI server
async function downloadImage(filename, localFilename) {
  const url = `${COMFYUI_URL}/view?filename=${filename}`;
  const res = await fetch(url);

  if (!res.ok) throw new Error(`Failed to fetch image: ${res.statusText}`);
  const buffer = Buffer.from(await res.arrayBuffer());

  return await saveImage(buffer, localFilename);
}

// 4c. Submit prompt and handle full image pipeline
async function generateImageViaComfyUI(prompt, filename) {
  const negativePrompt = `heavy shading, deep blacks, cross-hatching, dark, gritty, shadow-filled, chiaroscuro, scratchy lines, photorealism, hyper-realistic, high detail, 3D render, CGI, polished, smooth shading, detailed textures, noisy, cluttered, blurry, text, logo, signature, watermark, artist name, branding, ugly, deformed, unnatural patterns, perfect curves, repetitive textures`;
  const workflow = buildComfyWorkflow(prompt, negativePrompt);

  try {
    console.log("Submitting prompt to ComfyUI...");
    const res = await fetch(`${COMFYUI_URL}/prompt`, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ prompt: workflow })
    });

    if (!res.ok) {
      throw new Error(`ComfyUI error: ${res.statusText}`);
    }

    const { prompt_id } = await res.json();

    console.log("Waiting for image result...");
    const filenames = await waitForImage(prompt_id);
    if (filenames.length === 0) throw new Error("No image generated");

    const comfyFilename = filenames[0];

    console.log("Downloading image...");
    const filepath = await downloadImage(comfyFilename, filename);
    return filepath;

  } catch (err) {
    console.error("Error generating image:", err.message);
    return null;
  }
}

// 5. Main export
export async function generateDungeonImages({ flavor }) {
  console.log("Generating dungeon image...");

  const finalPrompt = await generateVisualPrompt(flavor);
  console.log("Engineered visual prompt:\n", finalPrompt);

  const baseFilename = `dungeon.png`;
  const upscaledFilename = `dungeon_upscaled.png`;

  const filepath = await generateImageViaComfyUI(finalPrompt, baseFilename);
  if (!filepath) {
    throw new Error("Failed to generate dungeon image.");
  }

  // Upscale 2x (half of A4 at 300dpi)
  const upscaledPath = await upscaleImage(filepath, upscaledFilename, 1456, 1024);
  if (!upscaledPath) {
    throw new Error("Failed to upscale dungeon image.");
  }

  return upscaledPath;
}