feat: implement horizontal slicing strategy for improved PDF417 detection

Improve barcode detection in documents with graphics and text by implementing
a multi-strategy horizontal slicing approach:

- Split documents into overlapping horizontal sections (5,4,3,2,1, or full)
- Decode each section separately to isolate individual barcodes
- Use 2% overlap between sections to avoid missing boundary codes
- Return the strategy that detects the most barcodes
- Early exit optimization when fewer codes are found

Also fix error handling to use Error objects instead of string literals.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Knee Cola
2025-12-20 09:39:58 +01:00
parent 7ad2497757
commit 09c4cca67b

View File

@@ -117,7 +117,7 @@ const file2canvas = async function (imageFile: File): Promise<HTMLCanvasElement>
canvas.height = img.height;
if (!ctx) {
reject("Context is not set")
reject(new Error("Context is not set"))
return;
}
@@ -232,6 +232,7 @@ const canvasToImageData = (canvas: HTMLCanvasElement): ImageData => {
/**
* Searches the given canvas for all PDF417 codes and decodes them.
* Uses a slicing strategy to improve detection when multiple barcodes are present.
* @param {HTMLCanvasElement} canvas - the canvas to search for PDF417 codes
* @return {Promise<Array<DecodeResult> | null>} - an array of decoded results
* */
@@ -240,28 +241,86 @@ const decodeFromCanvas = async (canvas: HTMLCanvasElement): Promise<Array<Decode
const readerOptions: ReaderOptions = {
tryHarder: true,
formats: ['PDF417'],
maxNumberOfSymbols: 10,
maxNumberOfSymbols: 1, // Decode one barcode per slice
};
// give browser a chance to re-paint
// this is needed to avoid UI freezing when decoding large images
await yieldToBrowser('decodeFromCanvas');
const width = canvas.width;
const height = canvas.height;
const imageData = canvasToImageData(canvas);
const results = await readBarcodes(imageData, readerOptions);
// Canvas can contain multiple PDF417 codes, so we need to try to find them all
// We will try splitting the canvas into different numbers of horizontal subsections
// and decode each subsection separately. The best result will be the one with the most codes found.
const splits = [5, 4, 3, 2, 1, 0];
const codesFound: Array<DecodeResult> = results
.filter(result => result.text)
.map((result) => ({
hub3aText: result.text,
billInfo: parseHubText(result.text),
}));
let bestResult: Array<DecodeResult> | null = null;
return (codesFound);
for (let splitIx = 0; splitIx < splits.length; splitIx++) {
const split = splits[splitIx];
// Add overlap to ensure we don't miss codes at section boundaries
const overlap = split === 0 ? 0 : Math.round(height / 50); // 2% overlap
const sectionHeight = split === 0 ? height : (Math.floor(Math.floor(height / split) + overlap));
// Create canvas sections
const canvasSections = Array.from({ length: split + 1 }, (_, i) => {
const sectionCanvas = document.createElement('canvas');
sectionCanvas.width = width;
sectionCanvas.height = sectionHeight;
const sectionContext = sectionCanvas.getContext('2d');
if (!sectionContext) {
throw new Error('Failed to get canvas context');
}
// Calculate the starting Y position for each section
const startY = i === 0 ? 0 : i * sectionHeight - overlap;
// Draw the section of the original canvas onto the new section canvas
sectionContext.drawImage(canvas, 0, startY, width, sectionHeight, 0, 0, width, sectionHeight);
return sectionCanvas;
});
const codesFoundInSection: Array<DecodeResult> = [];
// Try to decode each section
for (const sectionCanvas of canvasSections) {
try {
// give browser a chance to re-paint
// this is needed to avoid UI freezing when decoding large images
await yieldToBrowser('decodeFromCanvas');
const imageData = canvasToImageData(sectionCanvas);
const results = await readBarcodes(imageData, readerOptions);
if (results.length > 0 && results[0].text) {
const hub3aText = results[0].text;
codesFoundInSection.push({
hub3aText,
billInfo: parseHubText(hub3aText),
});
}
} catch (error) {
// If no code was found in the current section, continue to next section
}
}
await yieldToBrowser('after decodeFromCanvas');
// If in this iteration we found fewer or equal codes than in the previous best result,
// we can return the best result. This is an optimization.
if (bestResult && codesFoundInSection.length <= bestResult.length) {
return bestResult;
}
bestResult = codesFoundInSection;
}
return bestResult;
} catch (error) {
console.log(error);
return (null);
return null;
}
}