/** * Ironparse — deterministic COBOL copybook verification engine. * * This is the *verification layer* of the Ironparse engine: the COBOL parser * and the five deterministic quality gates that produce the field-for-field * parity guarantee. It runs with zero LLM involvement — same input, same * output, every run. * * In a pilot deployment, a local model running inside the customer's VPC * drafts the candidate TypeScript/Zod schema; these same gates then reject any * draft whose field count or structural shape does not match the source AST. * In this public trace the emitter below produces the reference schema * directly from the AST, and the gates verify it. Nothing is mocked: every * field count, every diff, and every hash is computed live from the copybook. * * No dependencies. Runs identically under Node and in a Cloudflare Worker. */ // --------------------------------------------------------------------------- // AST types // --------------------------------------------------------------------------- export interface Occurs { min: number; max: number; dependingOn: string | null; // OCCURS DEPENDING ON , else null (fixed) } export interface Node { level: number; name: string; pic: string | null; // raw PIC clause, e.g. "S9(9)V99" — null for group items comp3: boolean; redefines: string | null; occurs: Occurs | null; children: Node[]; // populated during analysis: path: string; // dotted JSON path of the field in the emitted schema storage: string; // human storage description tsType: string; // emitted leaf TS/Zod primitive (leaf nodes only) } export interface Ast { records: Node[]; // top-level 01 records leaves: Node[]; // every elementary (PIC-bearing) field, in source order redefinesCount: number; odoCount: number; // OCCURS DEPENDING ON groups fixedOccursCount: number; } // --------------------------------------------------------------------------- // Parser — deterministic, line-oriented. No LLM, no heuristics-on-vibes. // --------------------------------------------------------------------------- const LEVEL_RE = /^s*(d{2})s+([A-Z0-9][A-Z0-9-]*)s*(.*?)s*.?s*$/; function parsePic(rest: string): { pic: string | null; comp3: boolean } { const comp3 = /COMP-3/.test(rest) || /COMPUTATIONAL-3/.test(rest); const m = rest.match(/PIC(?:TURE)?s+(?:ISs+)?([0-9A-Z()V$S.,+-/]+)/); return { pic: m ? m[1] : null, comp3 }; } function parseOccurs(rest: string): Occurs | null { // OCCURS n TIMES -> fixed n // OCCURS 0 TO m TIMES DEPENDING ON Y -> variable, depending on Y const dep = rest.match(/OCCURSs+(d+)s+TOs+(d+)s+TIMESs+DEPENDINGs+ONs+([A-Z0-9][A-Z0-9-]*)/); if (dep) return { min: parseInt(dep[1], 10), max: parseInt(dep[2], 10), dependingOn: dep[3] }; const fixed = rest.match(/OCCURSs+(d+)s+TIMES/); if (fixed) return { min: parseInt(fixed[1], 10), max: parseInt(fixed[1], 10), dependingOn: null }; return null; } function parseRedefines(rest: string): string | null { const m = rest.match(/REDEFINESs+([A-Z0-9][A-Z0-9-]*)/); return m ? m[1] : null; } /** Parse a COBOL copybook into a tree of records keyed by level number. */ export function parseCopybook(text: string): Ast { // COBOL copybooks may use fixed-format columns; join continuation logic is // out of scope for these ACORD samples (each clause is on one logical line). const lines = text .split(/ ? /) .map((l) => l.replace(/*>.*/, "")) // strip inline comments .filter((l) => l.trim().length > 0 && !/^s**/.test(l)); // drop comment lines const records: Node[] = []; const stack: Node[] = []; for (const line of lines) { const m = line.match(LEVEL_RE); if (!m) continue; const level = parseInt(m[1], 10); const name = m[2]; const rest = m[3] || ""; const { pic, comp3 } = parsePic(rest); const node: Node = { level, name, pic, comp3, redefines: parseRedefines(rest), occurs: parseOccurs(rest), children: [], path: "", storage: "", tsType: "", }; // Pop the stack until we find a parent with a strictly lower level. while (stack.length > 0 && stack[stack.length - 1].level >= level) stack.pop(); if (stack.length === 0) { records.push(node); } else { stack[stack.length - 1].children.push(node); } stack.push(node); } // Collect elementary fields (PIC-bearing leaves) in source order + counts. const leaves: Node[] = []; let redefinesCount = 0; let odoCount = 0; let fixedOccursCount = 0; const walk = (node: Node, prefix: string) => { const seg = toCamel(node.name); const isArray = node.occurs !== null; const path = prefix ? `${prefix}.${seg}` : seg; if (node.redefines) redefinesCount++; if (node.occurs?.dependingOn) odoCount++; if (node.occurs && !node.occurs.dependingOn) fixedOccursCount++; if (node.pic) { const { storage, tsType } = mapPic(node.pic, node.comp3); node.path = isArray ? `${path}[]` : path; node.storage = storage; node.tsType = tsType; leaves.push(node); } else { const childPrefix = isArray ? `${path}[]` : path; for (const c of node.children) walk(c, childPrefix); } }; for (const r of records) walk(r, ""); return { records, leaves, redefinesCount, odoCount, fixedOccursCount }; } // --------------------------------------------------------------------------- // Type mapping — COBOL PIC -> storage description + Zod primitive // --------------------------------------------------------------------------- function picLength(pic: string): number { // Expand things like X(35) or 9(9)V99 into a character count (display). let total = 0; const re = /([X9AN])((d+))|([X9AN])/g; let mm: RegExpExecArray | null; while ((mm = re.exec(pic)) !== null) { total += mm[2] ? parseInt(mm[2], 10) : 1; } return total; } export function mapPic(pic: string, comp3: boolean): { storage: string; tsType: string } { const signed = /^S/.test(pic); const hasDecimal = /V/.test(pic); const isNumeric = /9/.test(pic) && !/X/.test(pic) && !/A/.test(pic); const len = picLength(pic); if (comp3) { // Packed decimal — a real number with scale; preserve as numeric. return { storage: `packed decimal (COMP-3${signed ? ", signed" : ""}${hasDecimal ? ", scaled" : ""})`, tsType: hasDecimal ? "z.number()" : "z.number().int()", }; } if (isNumeric) { // Display numeric: preserve exact digits (leading zeros are significant on // a mainframe). Fidelity-first: model as a constrained string. return { storage: `display numeric${signed ? ", signed" : ""}${hasDecimal ? ", implied decimal" : ""} (${len} digits)`, tsType: hasDecimal || signed ? `z.string()` : `z.string().regex(/^\\d{1,${len}}$/)`, }; } // Alphanumeric / alphabetic. return { storage: `alphanumeric (${len} chars)`, tsType: `z.string().max(${len})` }; } // --------------------------------------------------------------------------- // Naming // --------------------------------------------------------------------------- export function toCamel(cobolName: string): string { const parts = cobolName.toLowerCase().split("-"); return parts .map((p, i) => (i === 0 ? p : p.charAt(0).toUpperCase() + p.slice(1))) .join(""); } // --------------------------------------------------------------------------- // Emitter — AST -> TypeScript/Zod schema text (deterministic reference output) // --------------------------------------------------------------------------- export interface EmitResult { code: string; primitiveCount: number; // leaf z.() emitted — independent of AST leaf count unionCount: number; // REDEFINES overlays modelled as z.discriminatedUnion / z.union discriminatedUnionCount: number; arrayCount: number; // OCCURS modelled as z.array } function emitNode(node: Node, indent: string): string { // Group item -> z.object({...}); elementary -> its primitive. let inner: string; if (node.pic) { inner = node.tsType; } else { const pad = indent + " "; const fields = node.children .filter((c) => !c.redefines) // redefining siblings handled by union at group level .map((c) => `${pad}${toCamel(c.name)}: ${emitNode(c, pad)},`) .join(" "); inner = `z.object({ ${fields} ${indent}})`; } if (node.occurs) { const note = node.occurs.dependingOn ? ` /* OCCURS 0..${node.occurs.max} DEPENDING ON ${node.occurs.dependingOn} */` : ` /* OCCURS ${node.occurs.max} */`; inner = `z.array(${inner}).max(${node.occurs.max})${note}`; } return inner; } export function emitZod(ast: Ast): EmitResult { const lines: string[] = []; lines.push(`import { z } from "zod";`); lines.push(""); // Index records by name to resolve REDEFINES overlays. const byName = new Map(); for (const r of ast.records) byName.set(r.name, r); let unionCount = 0; let discriminatedUnionCount = 0; // A record that is REDEFINES'd by a later record forms a memory-overlay // union: the same bytes can be read as either layout. const redefinedBy = new Map(); for (const r of ast.records) { if (r.redefines) { const arr = redefinedBy.get(r.redefines) || []; arr.push(r); redefinedBy.set(r.redefines, arr); } } const emittedRoots: string[] = []; const rootFields: string[] = []; for (const r of ast.records) { if (r.redefines) continue; // emitted as part of the base record's union const overlays = redefinedBy.get(r.name); const field = toCamel(r.name); if (overlays && overlays.length > 0) { // Memory overlay -> union of all layouts that share these bytes. const branches = [r, ...overlays]; const discriminator = sharedDiscriminator(branches); const branchCode = branches.map((b) => emitNode(b, " ")).join(", "); let unionExpr: string; if (discriminator) { unionExpr = `z.discriminatedUnion("${discriminator}", [ ${branchCode} ])`; discriminatedUnionCount++; } else { unionExpr = `z.union([ ${branchCode} ])`; } unionCount++; rootFields.push( ` // REDEFINES overlay: ${[r.name, ...overlays.map((o) => o.name)].join(" / ")} ${field}: ${unionExpr},`, ); } else { rootFields.push(` ${field}: ${emitNode(r, " ")},`); } } lines.push(`export const RecordSchema = z.object({`); lines.push(rootFields.join(" ")); lines.push(`});`); lines.push(""); lines.push(`export type Record = z.infer;`); const code = lines.join(" "); // Independent structural counts parsed back out of the emitted text — this // is what Gate 03 / Gate 04 compare against the AST, not a bookkeeping echo. const primitiveCount = (code.match(/z.(string|number|boolean)(/g) || []).length; const arrayCount = (code.match(/z.array(/g) || []).length; return { code, primitiveCount, unionCount, discriminatedUnionCount, arrayCount }; } /** A discriminator exists if every union branch leads with the same-shaped * 1-char indicator field (classic ACORD record-type byte). */ function sharedDiscriminator(branches: Node[]): string | null { const firsts = branches.map((b) => b.children[0]).filter(Boolean); if (firsts.length !== branches.length) return null; const allOneChar = firsts.every((f) => f.pic && /^X(1)$|^X$/.test(f.pic)); if (!allOneChar) return null; // Use each branch's own first field name; discriminatedUnion needs a single // shared key, so only valid when the names match. ACORD overlays differ, so // we fall back to z.union in that case — modelled honestly, not forced. const name = toCamel(firsts[0].name); return firsts.every((f) => toCamel(f.name) === name) ? name : null; } // --------------------------------------------------------------------------- // The five deterministic quality gates // --------------------------------------------------------------------------- export interface Gate { id: string; name: string; detail: string; pass: boolean; measure: string; // the actual number that decided it } export function runGates(ast: Ast, emit: EmitResult): Gate[] { const gates: Gate[] = []; // 01 PARSER — Tree extraction produced field-bearing records. gates.push({ id: "01", name: "PARSER", detail: "Deterministic AST extraction yields a non-empty, field-bearing record set. No LLM involved.", pass: ast.records.length > 0 && ast.leaves.length > 0, measure: `${ast.records.length} records · ${ast.leaves.length} elementary fields`, }); // 02 SCHEMA_SANITY — Emitted schema is valid, fenced-free Zod. const balanced = isBalanced(emit.code); const noFence = !/```/.test(emit.code); const hasObject = /z.object(/.test(emit.code); gates.push({ id: "02", name: "SCHEMA_SANITY", detail: "Candidate schema parses as valid Zod — balanced delimiters, no markdown fences, no prose.", pass: balanced && noFence && hasObject, measure: balanced && noFence && hasObject ? "valid Zod module" : "malformed output", }); // 03 FIELD_PARITY — strict count equality. Math, not judgment. gates.push({ id: "03", name: "FIELD_PARITY", detail: "len(COBOL elementary fields) === len(emitted schema leaves). Off by one and the build fails.", pass: ast.leaves.length === emit.primitiveCount, measure: `COBOL ${ast.leaves.length} ⇄ schema ${emit.primitiveCount}`, }); // 04 DARK_CORNER — every overlay/array survives translation. const overlayGroups = redefinesUnionGroups(ast); const expectedArrays = ast.odoCount + ast.fixedOccursCount; const redefinesOk = emit.unionCount >= overlayGroups; const odoOk = emit.arrayCount >= expectedArrays; gates.push({ id: "04", name: "DARK_CORNER", detail: "Every REDEFINES overlay compiles to a union (discriminated where a record-type byte exists); every OCCURS to a dynamic array.", pass: redefinesOk && odoOk, measure: `${emit.unionCount}/${overlayGroups} overlays · ${emit.arrayCount}/${expectedArrays} arrays`, }); // 05 MOCK_STRUCTURE — round-trip a mock document through the shape. const mock = buildMock(ast); const roundTrip = validateMock(ast, mock); gates.push({ id: "05", name: "MOCK_STRUCTURE", detail: "A mock document generated from the schema re-validates against it — the schema is internally consistent.", pass: roundTrip.ok, measure: roundTrip.ok ? `${roundTrip.checked} nodes round-tripped` : roundTrip.reason, }); return gates; } // Count of base records that participate in a REDEFINES overlay group. function redefinesUnionGroups(ast: Ast): number { const bases = new Set(); for (const r of ast.records) if (r.redefines) bases.add(r.redefines); return bases.size; } // --------------------------------------------------------------------------- // Mock generation + round-trip validation (Gate 05) // --------------------------------------------------------------------------- function mockLeaf(node: Node): unknown { if (node.comp3) return node.pic && /V/.test(node.pic) ? 0.0 : 0; if (node.pic && /9/.test(node.pic) && !/X|A/.test(node.pic)) return "0"; return ""; } function buildMock(ast: Ast): any { const byName = new Map(); for (const r of ast.records) byName.set(r.name, r); const redefinedBy = new Map(); for (const r of ast.records) if (r.redefines) { const a = redefinedBy.get(r.redefines) || []; a.push(r); redefinedBy.set(r.redefines, a); } const buildGroup = (node: Node): any => { const obj: any = {}; for (const c of node.children) { if (c.redefines) continue; obj[toCamel(c.name)] = buildNode(c); } return obj; }; const buildNode = (node: Node): any => { let val: any; if (node.pic) val = mockLeaf(node); else val = buildGroup(node); if (node.occurs) return [val]; // one representative element return val; }; const root: any = {}; for (const r of ast.records) { if (r.redefines) continue; root[toCamel(r.name)] = buildNode(r); // base layout chosen for overlays } return root; } function validateMock(ast: Ast, mock: any): { ok: boolean; checked: number; reason: string } { let checked = 0; const check = (node: Node, val: any): boolean => { checked++; if (node.occurs) { if (!Array.isArray(val)) return false; return val.every((v) => checkInner(node, v)); } return checkInner(node, val); }; const checkInner = (node: Node, val: any): boolean => { if (node.pic) { if (node.comp3) return typeof val === "number"; return typeof val === "string"; } if (typeof val !== "object" || val === null) return false; return node.children.filter((c) => !c.redefines).every((c) => check(c, val[toCamel(c.name)])); }; for (const r of ast.records) { if (r.redefines) continue; if (!check(r, mock[toCamel(r.name)])) return { ok: false, checked, reason: `node ${r.name} failed` }; } return { ok: true, checked, reason: "" }; } // --------------------------------------------------------------------------- // Small utilities // --------------------------------------------------------------------------- function isBalanced(code: string): boolean { const pairs: Record = { ")": "(", "]": "[", "}": "{" }; const stack: string[] = []; for (const ch of code) { if (ch === "(" || ch === "[" || ch === "{") stack.push(ch); else if (ch === ")" || ch === "]" || ch === "}") { if (stack.pop() !== pairs[ch]) return false; } } return stack.length === 0; } export async function sha256(text: string): Promise { const enc = new TextEncoder().encode(text); const digest = await crypto.subtle.digest("SHA-256", enc); return [...new Uint8Array(digest)].map((b) => b.toString(16).padStart(2, "0")).join(""); } // --------------------------------------------------------------------------- // Receipt — the auditable artifact a pilot delivers per copybook // --------------------------------------------------------------------------- export interface FieldMapping { cobol: string; level: number; pic: string; storage: string; tsPath: string; tsType: string; } export interface Receipt { copybook: string; inputSha256: string; outputSha256: string; recordCount: number; fieldCount: number; redefinesCount: number; odoCount: number; fixedOccursCount: number; gates: Gate[]; verdict: "PASS" | "FAIL"; schema: string; mappings: FieldMapping[]; } export async function buildReceipt(name: string, copybook: string): Promise { const ast = parseCopybook(copybook); const emit = emitZod(ast); const gates = runGates(ast, emit); const inputSha256 = await sha256(copybook.replace(/ /g, " ").trimEnd()); const outputSha256 = await sha256(emit.code); const mappings: FieldMapping[] = ast.leaves.map((n) => ({ cobol: n.name, level: n.level, pic: (n.comp3 ? `${n.pic} COMP-3` : n.pic) || "", storage: n.storage, tsPath: n.path, tsType: n.tsType, })); return { copybook: name, inputSha256, outputSha256, recordCount: ast.records.length, fieldCount: ast.leaves.length, redefinesCount: ast.redefinesCount, odoCount: ast.odoCount, fixedOccursCount: ast.fixedOccursCount, gates, verdict: gates.every((g) => g.pass) ? "PASS" : "FAIL", schema: emit.code, mappings, }; }