Compare commits

...

2 Commits

Author SHA1 Message Date
Madison Grubb
27dfed05ac improve image gen prompting. increase cfg from 1->3 to make prompt follow more aggressively
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/cron/ci Pipeline was successful
2025-09-04 23:09:57 -04:00
Madison Grubb
714d0351ea fix png compression 2025-09-04 23:02:28 -04:00
5 changed files with 16 additions and 63 deletions

View File

@@ -22,12 +22,13 @@ Scrollsmith is a Node.js tool for generating Dungeons & Dragons one-page dungeon
- Node.js 22+
- Ollama server running and accessible
- Nextcloud (optional) for PDF uploads
- Gitea Releases (optional) for PDF uploads
- `.env` file with:
```env
OLLAMA_API_URL=http://localhost:3000/api/chat/completions
OLLAMA_API_KEY=your_api_key_here
COMFYUI_URL=http://192.168.1.124:8188
````
---

View File

@@ -1,27 +0,0 @@
import fs from 'fs/promises';
import UPNG from 'upng-js';
const countUniqueColors = (data) => {
const uniqueColors = new Set();
for (let i = 0; i < data.length; i += 4) {
uniqueColors.add(`${data[i]},${data[i + 1]},${data[i + 2]},${data[i + 3]}`);
if (uniqueColors.size > 256) {
return uniqueColors.size;
}
}
return uniqueColors.size;
};
export async function compressPng (filePath) {
const buffer = await fs.readFile(filePath);
const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
const img = UPNG.decode(arrayBuffer);
const rgba = new Uint8Array(img.data);
const frameData = new Uint8Array(img.width * img.height * 4);
frameData.set(rgba.subarray(0, frameData.length));
const cnum = countUniqueColors(rgba) <= 256 ? 256 : 0;
const optimizedArrayBuffer = UPNG.encode([frameData.buffer], img.width, img.height, cnum, img.depth);
const optimizedBuffer = Buffer.from(optimizedArrayBuffer);
await fs.writeFile(filePath, optimizedBuffer);
return filePath;
};

View File

@@ -2,29 +2,27 @@ import path from "path";
import { mkdir, writeFile } from "fs/promises";
import { fileURLToPath } from "url";
import { callOllama } from "./ollamaClient.js";
import { compressPng } from "./compressPng.js";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const COMFYUI_URL = process.env.COMFYUI_URL || "http://localhost:8188";
// Drawing style prefix
const STYLE_PREFIX = `a high-contrast, black and white pen and ink drawing, hand-drawn sketch aesthetic, very low detail, extremely minimal, visible loose linework, expressive simple hatching for shadows, quick conceptual sketch, subtle color accent`;
const STYLE_PREFIX = `clean line art, minimalist sketch, concept art sketch, black and white line drawing, lots of white space, sparse shading, simple hatching, very low detail, subtle color accent`;
// 1. Generate engineered visual prompt
async function generateVisualPrompt(flavor) {
const rawPrompt = await callOllama(
`You are a prompt engineer specializing in visual prompts for AI image generation (e.g., Stable Diffusion, ComfyUI). Given a piece of fantasy flavor text, your job is to extract and translate the visual elements into a highly descriptive image prompt.
`You are a prompt engineer specializing in visual prompts for AI image generation. Your goal is to translate fantasy flavor text into a sparse, minimalist scene description.
Your output should be structured like a list of visual tags, not a story or paragraph. Focus on describing the environment, mood, architecture, lighting, materials, and color. Avoid abstract, emotional, and visual language. Be literal, specific, and visual only.
Your output must be a simple list of visual tags describing only the most essential elements of the scene. Focus on the core subject and mood.
Rules:
- Do NOT repeat phrases or wording from the input.
- Only include things that could be seen in a single, still image.
- Use visual keywords, rich adjectives, and clear scene descriptors.
- Keep the prompt concise, 40-80 words.
- Maintain simplicity in descriptions. The image must be a minimal, hand drawn sketch aesthetic with low detail.
- Avoid characters or creatures unless clearly described.
- Avoid referencing rendering style, color technique, camera effects, or drawing medium — focus only on the visual content of the scene.
- Describe a sparse scene with a single focal point or area.
- Use only 3-5 key descriptive phrases or tags.
- The entire output should be very short, 20-50 words maximum.
- Do NOT repeat wording from the input.
- Focus only on visual content, not style, medium, or camera effects.
- Avoid describing fine details; focus on large forms and the overall impression.
- Do NOT include phrases like “an image of” or “a scene showing”.
Input:
@@ -53,7 +51,7 @@ function buildComfyWorkflow(promptText, negativeText = "") {
"inputs": {
"seed": Math.floor(Math.random() * 100000),
"steps": 4,
"cfg": 1,
"cfg": 3,
"sampler_name": "euler",
"scheduler": "simple",
"denoise": 1,
@@ -158,7 +156,8 @@ async function downloadImage(filename, localFilename) {
// 4c. Submit prompt and handle full image pipeline
async function generateImageViaComfyUI(prompt, filename) {
const workflow = buildComfyWorkflow(prompt, "photorealism, hyper-realistic, high detail, 3D render, CGI, ray tracing, glossy, polished, smooth shading, realistic lighting, digital painting, anime, cartoon, pixelated, noisy, cluttered, blurry, sharp focus, symmetrical, perfect perspective, detailed textures, high-resolution textures, high contrast lighting, lens flare, bokeh, camera artifacts, text, logo, signature, watermark, overexposed, underexposed, glowing edges");
const negativePrompt = `heavy shading, deep blacks, cross-hatching, dark, gritty, shadow-filled, chiaroscuro, scratchy lines, photorealism, hyper-realistic, high detail, 3D render, CGI, polished, smooth shading, detailed textures, noisy, cluttered, blurry, text, logo, signature, watermark`;
const workflow = buildComfyWorkflow(prompt, negativePrompt);
try {
console.log("Submitting prompt to ComfyUI...");
@@ -182,9 +181,6 @@ async function generateImageViaComfyUI(prompt, filename) {
console.log("Downloading image...");
const filepath = await downloadImage(comfyFilename, filename);
console.log("Compressing PNG...");
await compressPng(filepath);
return filepath;
} catch (err) {

18
package-lock.json generated
View File

@@ -10,8 +10,7 @@
"license": "SEE LICENSE IN README.md",
"dependencies": {
"dotenv": "^17.2.1",
"puppeteer": "^24.17.1",
"upng-js": "^2.1.0"
"puppeteer": "^24.17.1"
},
"devDependencies": {
"@eslint/js": "^9.34.0",
@@ -1476,12 +1475,6 @@
"node": ">= 14"
}
},
"node_modules/pako": {
"version": "1.0.11",
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
"license": "(MIT AND Zlib)"
},
"node_modules/parent-module": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
@@ -1878,15 +1871,6 @@
"license": "MIT",
"optional": true
},
"node_modules/upng-js": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/upng-js/-/upng-js-2.1.0.tgz",
"integrity": "sha512-d3xzZzpMP64YkjP5pr8gNyvBt7dLk/uGI67EctzDuVp4lCZyVMo0aJO6l/VDlgbInJYDY6cnClLoBp29eKWI6g==",
"license": "MIT",
"dependencies": {
"pako": "^1.0.5"
}
},
"node_modules/uri-js": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",

View File

@@ -13,8 +13,7 @@
"description": "",
"dependencies": {
"dotenv": "^17.2.1",
"puppeteer": "^24.17.1",
"upng-js": "^2.1.0"
"puppeteer": "^24.17.1"
},
"devDependencies": {
"@eslint/js": "^9.34.0",