feat: enhance cleanLlmJson function to improve JSON extraction and formatting

This commit is contained in:
Victor Noguera
2026-04-07 23:50:23 -04:00
parent 1dd657b386
commit a5a2e9182c

View File

@@ -177,9 +177,24 @@ function cleanLlmJson(text: string): string {
const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)```/); const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
let cleaned = fenceMatch ? fenceMatch[1]!.trim() : text.trim(); let cleaned = fenceMatch ? fenceMatch[1]!.trim() : text.trim();
// Strip any non-JSON wrapper text by taking the largest JSON-looking segment
const firstArray = cleaned.indexOf("[");
const firstObject = cleaned.indexOf("{");
const startCandidates = [firstArray, firstObject].filter((i) => i >= 0);
const start = startCandidates.length > 0 ? Math.min(...startCandidates) : -1;
const endArray = cleaned.lastIndexOf("]");
const endObject = cleaned.lastIndexOf("}");
const end = Math.max(endArray, endObject);
if (start >= 0 && end > start) {
cleaned = cleaned.slice(start, end + 1);
}
// Fix trailing comma-quote before closing brace: ,"} → "} // Fix trailing comma-quote before closing brace: ,"} → "}
cleaned = cleaned.replace(/,"\s*}/g, '"}'); cleaned = cleaned.replace(/,"\s*}/g, '"}');
// Fix malformed comma-quote before a closing bracket/brace: ,"} or ,"]
cleaned = cleaned.replace(/,\s*"\s*([}\]])/g, "$1");
// Fix trailing commas before ] or } // Fix trailing commas before ] or }
cleaned = cleaned.replace(/,\s*([}\]])/g, "$1"); cleaned = cleaned.replace(/,\s*([}\]])/g, "$1");