The verification engine · open source
Read the code that produces every receipt.
/**
* Ironparse — deterministic COBOL copybook verification engine.
*
* This is the *verification layer* of the Ironparse engine: the COBOL parser
* and the five deterministic quality gates that produce the field-for-field
* parity guarantee. It runs with zero LLM involvement — same input, same
* output, every run.
*
* In a pilot deployment, a local model running inside the customer's VPC
* drafts the candidate TypeScript/Zod schema; these same gates then reject any
* draft whose field count or structural shape does not match the source AST.
* In this public trace the emitter below produces the reference schema
* directly from the AST, and the gates verify it. Nothing is mocked: every
* field count, every diff, and every hash is computed live from the copybook.
*
* No dependencies. Runs identically under Node and in a Cloudflare Worker.
*/
// ---------------------------------------------------------------------------
// AST types
// ---------------------------------------------------------------------------
export interface Occurs {
min: number;
max: number;
dependingOn: string | null; // OCCURS DEPENDING ON <field>, else null (fixed)
}
export interface Node {
level: number;
name: string;
pic: string | null; // raw PIC clause, e.g. "S9(9)V99" — null for group items
comp3: boolean;
redefines: string | null;
occurs: Occurs | null;
children: Node[];
// populated during analysis:
path: string; // dotted JSON path of the field in the emitted schema
storage: string; // human storage description
tsType: string; // emitted leaf TS/Zod primitive (leaf nodes only)
}
export interface Ast {
records: Node[]; // top-level 01 records
leaves: Node[]; // every elementary (PIC-bearing) field, in source order
redefinesCount: number;
odoCount: number; // OCCURS DEPENDING ON groups
fixedOccursCount: number;
}
// ---------------------------------------------------------------------------
// Parser — deterministic, line-oriented. No LLM, no heuristics-on-vibes.
// ---------------------------------------------------------------------------
const LEVEL_RE = /^s*(d{2})s+([A-Z0-9][A-Z0-9-]*)s*(.*?)s*.?s*$/;
function parsePic(rest: string): { pic: string | null; comp3: boolean } {
const comp3 = /COMP-3/.test(rest) || /COMPUTATIONAL-3/.test(rest);
const m = rest.match(/PIC(?:TURE)?s+(?:ISs+)?([0-9A-Z()V$S.,+-/]+)/);
return { pic: m ? m[1] : null, comp3 };
}
function parseOccurs(rest: string): Occurs | null {
// OCCURS n TIMES -> fixed n
// OCCURS 0 TO m TIMES DEPENDING ON Y -> variable, depending on Y
const dep = rest.match(/OCCURSs+(d+)s+TOs+(d+)s+TIMESs+DEPENDINGs+ONs+([A-Z0-9][A-Z0-9-]*)/);
if (dep) return { min: parseInt(dep[1], 10), max: parseInt(dep[2], 10), dependingOn: dep[3] };
const fixed = rest.match(/OCCURSs+(d+)s+TIMES/);
if (fixed) return { min: parseInt(fixed[1], 10), max: parseInt(fixed[1], 10), dependingOn: null };
return null;
}
function parseRedefines(rest: string): string | null {
const m = rest.match(/REDEFINESs+([A-Z0-9][A-Z0-9-]*)/);
return m ? m[1] : null;
}
/** Parse a COBOL copybook into a tree of records keyed by level number. */
export function parseCopybook(text: string): Ast {
// COBOL copybooks may use fixed-format columns; join continuation logic is
// out of scope for these ACORD samples (each clause is on one logical line).
const lines = text
.split(/
?
/)
.map((l) => l.replace(/*>.*/, "")) // strip inline comments
.filter((l) => l.trim().length > 0 && !/^s**/.test(l)); // drop comment lines
const records: Node[] = [];
const stack: Node[] = [];
for (const line of lines) {
const m = line.match(LEVEL_RE);
if (!m) continue;
const level = parseInt(m[1], 10);
const name = m[2];
const rest = m[3] || "";
const { pic, comp3 } = parsePic(rest);
const node: Node = {
level,
name,
pic,
comp3,
redefines: parseRedefines(rest),
occurs: parseOccurs(rest),
children: [],
path: "",
storage: "",
tsType: "",
};
// Pop the stack until we find a parent with a strictly lower level.
while (stack.length > 0 && stack[stack.length - 1].level >= level) stack.pop();
if (stack.length === 0) {
records.push(node);
} else {
stack[stack.length - 1].children.push(node);
}
stack.push(node);
}
// Collect elementary fields (PIC-bearing leaves) in source order + counts.
const leaves: Node[] = [];
let redefinesCount = 0;
let odoCount = 0;
let fixedOccursCount = 0;
const walk = (node: Node, prefix: string) => {
const seg = toCamel(node.name);
const isArray = node.occurs !== null;
const path = prefix ? `${prefix}.${seg}` : seg;
if (node.redefines) redefinesCount++;
if (node.occurs?.dependingOn) odoCount++;
if (node.occurs && !node.occurs.dependingOn) fixedOccursCount++;
if (node.pic) {
const { storage, tsType } = mapPic(node.pic, node.comp3);
node.path = isArray ? `${path}[]` : path;
node.storage = storage;
node.tsType = tsType;
leaves.push(node);
} else {
const childPrefix = isArray ? `${path}[]` : path;
for (const c of node.children) walk(c, childPrefix);
}
};
for (const r of records) walk(r, "");
return { records, leaves, redefinesCount, odoCount, fixedOccursCount };
}
// ---------------------------------------------------------------------------
// Type mapping — COBOL PIC -> storage description + Zod primitive
// ---------------------------------------------------------------------------
function picLength(pic: string): number {
// Expand things like X(35) or 9(9)V99 into a character count (display).
let total = 0;
const re = /([X9AN])((d+))|([X9AN])/g;
let mm: RegExpExecArray | null;
while ((mm = re.exec(pic)) !== null) {
total += mm[2] ? parseInt(mm[2], 10) : 1;
}
return total;
}
export function mapPic(pic: string, comp3: boolean): { storage: string; tsType: string } {
const signed = /^S/.test(pic);
const hasDecimal = /V/.test(pic);
const isNumeric = /9/.test(pic) && !/X/.test(pic) && !/A/.test(pic);
const len = picLength(pic);
if (comp3) {
// Packed decimal — a real number with scale; preserve as numeric.
return {
storage: `packed decimal (COMP-3${signed ? ", signed" : ""}${hasDecimal ? ", scaled" : ""})`,
tsType: hasDecimal ? "z.number()" : "z.number().int()",
};
}
if (isNumeric) {
// Display numeric: preserve exact digits (leading zeros are significant on
// a mainframe). Fidelity-first: model as a constrained string.
return {
storage: `display numeric${signed ? ", signed" : ""}${hasDecimal ? ", implied decimal" : ""} (${len} digits)`,
tsType: hasDecimal || signed ? `z.string()` : `z.string().regex(/^\\d{1,${len}}$/)`,
};
}
// Alphanumeric / alphabetic.
return { storage: `alphanumeric (${len} chars)`, tsType: `z.string().max(${len})` };
}
// ---------------------------------------------------------------------------
// Naming
// ---------------------------------------------------------------------------
export function toCamel(cobolName: string): string {
const parts = cobolName.toLowerCase().split("-");
return parts
.map((p, i) => (i === 0 ? p : p.charAt(0).toUpperCase() + p.slice(1)))
.join("");
}
// ---------------------------------------------------------------------------
// Emitter — AST -> TypeScript/Zod schema text (deterministic reference output)
// ---------------------------------------------------------------------------
export interface EmitResult {
code: string;
primitiveCount: number; // leaf z.<primitive>() emitted — independent of AST leaf count
unionCount: number; // REDEFINES overlays modelled as z.discriminatedUnion / z.union
discriminatedUnionCount: number;
arrayCount: number; // OCCURS modelled as z.array
}
function emitNode(node: Node, indent: string): string {
// Group item -> z.object({...}); elementary -> its primitive.
let inner: string;
if (node.pic) {
inner = node.tsType;
} else {
const pad = indent + " ";
const fields = node.children
.filter((c) => !c.redefines) // redefining siblings handled by union at group level
.map((c) => `${pad}${toCamel(c.name)}: ${emitNode(c, pad)},`)
.join("
");
inner = `z.object({
${fields}
${indent}})`;
}
if (node.occurs) {
const note = node.occurs.dependingOn
? ` /* OCCURS 0..${node.occurs.max} DEPENDING ON ${node.occurs.dependingOn} */`
: ` /* OCCURS ${node.occurs.max} */`;
inner = `z.array(${inner}).max(${node.occurs.max})${note}`;
}
return inner;
}
export function emitZod(ast: Ast): EmitResult {
const lines: string[] = [];
lines.push(`import { z } from "zod";`);
lines.push("");
// Index records by name to resolve REDEFINES overlays.
const byName = new Map<string, Node>();
for (const r of ast.records) byName.set(r.name, r);
let unionCount = 0;
let discriminatedUnionCount = 0;
// A record that is REDEFINES'd by a later record forms a memory-overlay
// union: the same bytes can be read as either layout.
const redefinedBy = new Map<string, Node[]>();
for (const r of ast.records) {
if (r.redefines) {
const arr = redefinedBy.get(r.redefines) || [];
arr.push(r);
redefinedBy.set(r.redefines, arr);
}
}
const emittedRoots: string[] = [];
const rootFields: string[] = [];
for (const r of ast.records) {
if (r.redefines) continue; // emitted as part of the base record's union
const overlays = redefinedBy.get(r.name);
const field = toCamel(r.name);
if (overlays && overlays.length > 0) {
// Memory overlay -> union of all layouts that share these bytes.
const branches = [r, ...overlays];
const discriminator = sharedDiscriminator(branches);
const branchCode = branches.map((b) => emitNode(b, " ")).join(",
");
let unionExpr: string;
if (discriminator) {
unionExpr = `z.discriminatedUnion("${discriminator}", [
${branchCode}
])`;
discriminatedUnionCount++;
} else {
unionExpr = `z.union([
${branchCode}
])`;
}
unionCount++;
rootFields.push(
` // REDEFINES overlay: ${[r.name, ...overlays.map((o) => o.name)].join(" / ")}
${field}: ${unionExpr},`,
);
} else {
rootFields.push(` ${field}: ${emitNode(r, " ")},`);
}
}
lines.push(`export const RecordSchema = z.object({`);
lines.push(rootFields.join("
"));
lines.push(`});`);
lines.push("");
lines.push(`export type Record = z.infer<typeof RecordSchema>;`);
const code = lines.join("
");
// Independent structural counts parsed back out of the emitted text — this
// is what Gate 03 / Gate 04 compare against the AST, not a bookkeeping echo.
const primitiveCount = (code.match(/z.(string|number|boolean)(/g) || []).length;
const arrayCount = (code.match(/z.array(/g) || []).length;
return { code, primitiveCount, unionCount, discriminatedUnionCount, arrayCount };
}
/** A discriminator exists if every union branch leads with the same-shaped
* 1-char indicator field (classic ACORD record-type byte). */
function sharedDiscriminator(branches: Node[]): string | null {
const firsts = branches.map((b) => b.children[0]).filter(Boolean);
if (firsts.length !== branches.length) return null;
const allOneChar = firsts.every((f) => f.pic && /^X(1)$|^X$/.test(f.pic));
if (!allOneChar) return null;
// Use each branch's own first field name; discriminatedUnion needs a single
// shared key, so only valid when the names match. ACORD overlays differ, so
// we fall back to z.union in that case — modelled honestly, not forced.
const name = toCamel(firsts[0].name);
return firsts.every((f) => toCamel(f.name) === name) ? name : null;
}
// ---------------------------------------------------------------------------
// The five deterministic quality gates
// ---------------------------------------------------------------------------
export interface Gate {
id: string;
name: string;
detail: string;
pass: boolean;
measure: string; // the actual number that decided it
}
export function runGates(ast: Ast, emit: EmitResult): Gate[] {
const gates: Gate[] = [];
// 01 PARSER — Tree extraction produced field-bearing records.
gates.push({
id: "01",
name: "PARSER",
detail: "Deterministic AST extraction yields a non-empty, field-bearing record set. No LLM involved.",
pass: ast.records.length > 0 && ast.leaves.length > 0,
measure: `${ast.records.length} records · ${ast.leaves.length} elementary fields`,
});
// 02 SCHEMA_SANITY — Emitted schema is valid, fenced-free Zod.
const balanced = isBalanced(emit.code);
const noFence = !/```/.test(emit.code);
const hasObject = /z.object(/.test(emit.code);
gates.push({
id: "02",
name: "SCHEMA_SANITY",
detail: "Candidate schema parses as valid Zod — balanced delimiters, no markdown fences, no prose.",
pass: balanced && noFence && hasObject,
measure: balanced && noFence && hasObject ? "valid Zod module" : "malformed output",
});
// 03 FIELD_PARITY — strict count equality. Math, not judgment.
gates.push({
id: "03",
name: "FIELD_PARITY",
detail: "len(COBOL elementary fields) === len(emitted schema leaves). Off by one and the build fails.",
pass: ast.leaves.length === emit.primitiveCount,
measure: `COBOL ${ast.leaves.length} ⇄ schema ${emit.primitiveCount}`,
});
// 04 DARK_CORNER — every overlay/array survives translation.
const overlayGroups = redefinesUnionGroups(ast);
const expectedArrays = ast.odoCount + ast.fixedOccursCount;
const redefinesOk = emit.unionCount >= overlayGroups;
const odoOk = emit.arrayCount >= expectedArrays;
gates.push({
id: "04",
name: "DARK_CORNER",
detail:
"Every REDEFINES overlay compiles to a union (discriminated where a record-type byte exists); every OCCURS to a dynamic array.",
pass: redefinesOk && odoOk,
measure: `${emit.unionCount}/${overlayGroups} overlays · ${emit.arrayCount}/${expectedArrays} arrays`,
});
// 05 MOCK_STRUCTURE — round-trip a mock document through the shape.
const mock = buildMock(ast);
const roundTrip = validateMock(ast, mock);
gates.push({
id: "05",
name: "MOCK_STRUCTURE",
detail: "A mock document generated from the schema re-validates against it — the schema is internally consistent.",
pass: roundTrip.ok,
measure: roundTrip.ok ? `${roundTrip.checked} nodes round-tripped` : roundTrip.reason,
});
return gates;
}
// Count of base records that participate in a REDEFINES overlay group.
function redefinesUnionGroups(ast: Ast): number {
const bases = new Set<string>();
for (const r of ast.records) if (r.redefines) bases.add(r.redefines);
return bases.size;
}
// ---------------------------------------------------------------------------
// Mock generation + round-trip validation (Gate 05)
// ---------------------------------------------------------------------------
function mockLeaf(node: Node): unknown {
if (node.comp3) return node.pic && /V/.test(node.pic) ? 0.0 : 0;
if (node.pic && /9/.test(node.pic) && !/X|A/.test(node.pic)) return "0";
return "";
}
function buildMock(ast: Ast): any {
const byName = new Map<string, Node>();
for (const r of ast.records) byName.set(r.name, r);
const redefinedBy = new Map<string, Node[]>();
for (const r of ast.records) if (r.redefines) {
const a = redefinedBy.get(r.redefines) || [];
a.push(r);
redefinedBy.set(r.redefines, a);
}
const buildGroup = (node: Node): any => {
const obj: any = {};
for (const c of node.children) {
if (c.redefines) continue;
obj[toCamel(c.name)] = buildNode(c);
}
return obj;
};
const buildNode = (node: Node): any => {
let val: any;
if (node.pic) val = mockLeaf(node);
else val = buildGroup(node);
if (node.occurs) return [val]; // one representative element
return val;
};
const root: any = {};
for (const r of ast.records) {
if (r.redefines) continue;
root[toCamel(r.name)] = buildNode(r); // base layout chosen for overlays
}
return root;
}
function validateMock(ast: Ast, mock: any): { ok: boolean; checked: number; reason: string } {
let checked = 0;
const check = (node: Node, val: any): boolean => {
checked++;
if (node.occurs) {
if (!Array.isArray(val)) return false;
return val.every((v) => checkInner(node, v));
}
return checkInner(node, val);
};
const checkInner = (node: Node, val: any): boolean => {
if (node.pic) {
if (node.comp3) return typeof val === "number";
return typeof val === "string";
}
if (typeof val !== "object" || val === null) return false;
return node.children.filter((c) => !c.redefines).every((c) => check(c, val[toCamel(c.name)]));
};
for (const r of ast.records) {
if (r.redefines) continue;
if (!check(r, mock[toCamel(r.name)])) return { ok: false, checked, reason: `node ${r.name} failed` };
}
return { ok: true, checked, reason: "" };
}
// ---------------------------------------------------------------------------
// Small utilities
// ---------------------------------------------------------------------------
function isBalanced(code: string): boolean {
const pairs: Record<string, string> = { ")": "(", "]": "[", "}": "{" };
const stack: string[] = [];
for (const ch of code) {
if (ch === "(" || ch === "[" || ch === "{") stack.push(ch);
else if (ch === ")" || ch === "]" || ch === "}") {
if (stack.pop() !== pairs[ch]) return false;
}
}
return stack.length === 0;
}
export async function sha256(text: string): Promise<string> {
const enc = new TextEncoder().encode(text);
const digest = await crypto.subtle.digest("SHA-256", enc);
return [...new Uint8Array(digest)].map((b) => b.toString(16).padStart(2, "0")).join("");
}
// ---------------------------------------------------------------------------
// Receipt — the auditable artifact a pilot delivers per copybook
// ---------------------------------------------------------------------------
export interface FieldMapping {
cobol: string;
level: number;
pic: string;
storage: string;
tsPath: string;
tsType: string;
}
export interface Receipt {
copybook: string;
inputSha256: string;
outputSha256: string;
recordCount: number;
fieldCount: number;
redefinesCount: number;
odoCount: number;
fixedOccursCount: number;
gates: Gate[];
verdict: "PASS" | "FAIL";
schema: string;
mappings: FieldMapping[];
}
export async function buildReceipt(name: string, copybook: string): Promise<Receipt> {
const ast = parseCopybook(copybook);
const emit = emitZod(ast);
const gates = runGates(ast, emit);
const inputSha256 = await sha256(copybook.replace(/
/g, "
").trimEnd());
const outputSha256 = await sha256(emit.code);
const mappings: FieldMapping[] = ast.leaves.map((n) => ({
cobol: n.name,
level: n.level,
pic: (n.comp3 ? `${n.pic} COMP-3` : n.pic) || "",
storage: n.storage,
tsPath: n.path,
tsType: n.tsType,
}));
return {
copybook: name,
inputSha256,
outputSha256,
recordCount: ast.records.length,
fieldCount: ast.leaves.length,
redefinesCount: ast.redefinesCount,
odoCount: ast.odoCount,
fixedOccursCount: ast.fixedOccursCount,
gates,
verdict: gates.every((g) => g.pass) ? "PASS" : "FAIL",
schema: emit.code,
mappings,
};
}