Skip to content

Commit f3dec56

Browse files
committed
feat: auto-fix undefined variable refs in compose-generated YAML
LLMs frequently generate YAML where step tasks reference step IDs instead of output variable names. Three repair strategies: 1. Step ID → its output variable (most common case) 2. Fuzzy substring match to closest output variable 3. Dependency-aware: pick unused upstream output (avoids collisions) This eliminates the most frequent compose --run failure mode.
1 parent 838aa9d commit f3dec56

1 file changed

Lines changed: 154 additions & 1 deletion

File tree

src/cli/compose.ts

Lines changed: 154 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 支持中文(agency-agents-zh)和英文(agency-agents)角色库。
66
*/
77
import { listAgents } from '../agents/loader.js';
8-
import { existsSync, writeFileSync, mkdirSync } from 'node:fs';
8+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
99
import { resolve, relative } from 'node:path';
1010
import { createConnector } from '../connectors/factory.js';
1111
import type { LLMConfig } from '../types.js';
@@ -435,6 +435,19 @@ export async function composeWorkflow(options: {
435435
if (retryYaml && retryYaml.includes('steps:')) {
436436
writeFileSync(savedPath, retryYaml + '\n', 'utf-8');
437437
const second = await validateGenerated(savedPath);
438+
// 重试后仍有变量引用错误 → 自动修复
439+
const retryVarErrors = second.errors.filter(e => e.includes('未定义的变量'));
440+
if (retryVarErrors.length > 0) {
441+
const fixResult = await autoFixVariableRefs(savedPath);
442+
if (fixResult.fixed > 0) {
443+
console.log(` 自动修复了 ${fixResult.fixed} 个变量引用:`);
444+
for (const f of fixResult.details) console.log(` {{${f.from}}} → {{${f.to}}}`);
445+
const afterFix = await validateGenerated(savedPath);
446+
warnings.push(...afterFix.errors);
447+
const fixedYaml = readFileSync(savedPath, 'utf-8').trim();
448+
return { yaml: fixedYaml, savedPath, relativePath, warnings };
449+
}
450+
}
438451
warnings.push(...second.errors);
439452
return { yaml: retryYaml, savedPath, relativePath, warnings };
440453
}
@@ -443,6 +456,146 @@ export async function composeWorkflow(options: {
443456
}
444457
}
445458

459+
// 自动修复未定义的变量引用(LLM 常见错误:变量名与 output 名不一致)
460+
const varErrors = first.errors.filter(e => e.includes('未定义的变量'));
461+
if (varErrors.length > 0) {
462+
const fixResult = await autoFixVariableRefs(savedPath);
463+
if (fixResult.fixed > 0) {
464+
console.log(` 自动修复了 ${fixResult.fixed} 个变量引用:`);
465+
for (const f of fixResult.details) {
466+
console.log(` {{${f.from}}} → {{${f.to}}}`);
467+
}
468+
// 重新校验
469+
const afterFix = await validateGenerated(savedPath);
470+
warnings.push(...afterFix.errors);
471+
const fixedYaml = readFileSync(savedPath, 'utf-8').trim();
472+
return { yaml: fixedYaml, savedPath, relativePath, warnings };
473+
}
474+
}
475+
446476
warnings.push(...first.errors);
447477
return { yaml, savedPath, relativePath, warnings };
448478
}
479+
480+
/**
481+
* 自动修复 compose 生成 YAML 中的变量引用错误。
482+
* 常见情况:LLM 用 step id 或 role 名代替 output 变量名。
483+
*/
484+
async function autoFixVariableRefs(yamlPath: string): Promise<{ fixed: number; details: { from: string; to: string }[] }> {
485+
const { parseWorkflow } = await import('../core/parser.js');
486+
const content = readFileSync(yamlPath, 'utf-8');
487+
let workflow;
488+
try {
489+
workflow = parseWorkflow(yamlPath);
490+
} catch {
491+
return { fixed: 0, details: [] };
492+
}
493+
494+
const inputNames = new Set((workflow.inputs || []).map((i: any) => i.name));
495+
const outputNames = new Set<string>();
496+
const stepIdToOutput = new Map<string, string>();
497+
498+
for (const step of workflow.steps) {
499+
if (step.output) {
500+
outputNames.add(step.output);
501+
stepIdToOutput.set(step.id, step.output);
502+
}
503+
}
504+
505+
const allDefined = new Set([...inputNames, ...outputNames, '_loop_iteration']);
506+
const replacements: { from: string; to: string }[] = [];
507+
let fixedContent = content;
508+
509+
// 找出所有未定义的变量引用
510+
const undefinedVars = new Set<string>();
511+
for (const step of workflow.steps) {
512+
const refs = step.task?.match(/\{\{(\w+)\}\}/g) || [];
513+
for (const ref of refs) {
514+
const varName = ref.slice(2, -2);
515+
if (!allDefined.has(varName)) {
516+
undefinedVars.add(varName);
517+
}
518+
}
519+
}
520+
521+
for (const badVar of undefinedVars) {
522+
// 策略1:badVar 是某个 step id,该 step 有 output → 替换为 output
523+
if (stepIdToOutput.has(badVar)) {
524+
const goodVar = stepIdToOutput.get(badVar)!;
525+
fixedContent = fixedContent.replace(new RegExp(`\\{\\{${badVar}\\}\\}`, 'g'), `{{${goodVar}}}`);
526+
replacements.push({ from: badVar, to: goodVar });
527+
continue;
528+
}
529+
530+
// 策略2:模糊匹配 — 找子串包含关系最强的 output 变量
531+
const best = findBestMatch(badVar, [...outputNames]);
532+
if (best) {
533+
fixedContent = fixedContent.replace(new RegExp(`\\{\\{${badVar}\\}\\}`, 'g'), `{{${best}}}`);
534+
replacements.push({ from: badVar, to: best });
535+
continue;
536+
}
537+
538+
// 策略3:按 depends_on 找上游有 output 且尚未被引用的步骤
539+
const alreadyUsed = new Set(replacements.map(r => r.to));
540+
for (const step of workflow.steps) {
541+
const refs = step.task?.match(/\{\{(\w+)\}\}/g) || [];
542+
const hasBadRef = refs.some((r: string) => r.slice(2, -2) === badVar);
543+
if (hasBadRef && step.depends_on?.length) {
544+
// 优先选还没被占用的上游 output
545+
const deps = step.depends_on.filter(d => stepIdToOutput.has(d));
546+
const unusedDep = deps.find(d => !alreadyUsed.has(stepIdToOutput.get(d)!));
547+
const depId = unusedDep || deps[0];
548+
if (depId) {
549+
const goodVar = stepIdToOutput.get(depId)!;
550+
fixedContent = fixedContent.replace(new RegExp(`\\{\\{${badVar}\\}\\}`, 'g'), `{{${goodVar}}}`);
551+
replacements.push({ from: badVar, to: goodVar });
552+
}
553+
break;
554+
}
555+
}
556+
}
557+
558+
if (replacements.length > 0) {
559+
writeFileSync(yamlPath, fixedContent, 'utf-8');
560+
}
561+
return { fixed: replacements.length, details: replacements };
562+
}
563+
564+
/**
565+
* 模糊匹配:找子串包含、前缀/后缀重叠最多的候选
566+
*/
567+
function findBestMatch(target: string, candidates: string[]): string | null {
568+
if (candidates.length === 0) return null;
569+
const t = target.toLowerCase();
570+
571+
// 完全包含关系
572+
for (const c of candidates) {
573+
const cl = c.toLowerCase();
574+
if (t.includes(cl) || cl.includes(t)) return c;
575+
}
576+
577+
// 按公共子串长度打分
578+
let best = '';
579+
let bestScore = 0;
580+
for (const c of candidates) {
581+
const cl = c.toLowerCase();
582+
const score = longestCommonSubstring(t, cl);
583+
if (score > bestScore && score >= 3) {
584+
bestScore = score;
585+
best = c;
586+
}
587+
}
588+
return best || null;
589+
}
590+
591+
function longestCommonSubstring(a: string, b: string): number {
592+
let max = 0;
593+
for (let i = 0; i < a.length; i++) {
594+
for (let j = 0; j < b.length; j++) {
595+
let k = 0;
596+
while (i + k < a.length && j + k < b.length && a[i + k] === b[j + k]) k++;
597+
if (k > max) max = k;
598+
}
599+
}
600+
return max;
601+
}

0 commit comments

Comments
 (0)