feat: implement horizontal slicing strategy for improved PDF417 detection

Improve barcode detection in documents with graphics and text by implementing a multi-strategy horizontal slicing approach: - Split documents into overlapping horizontal sections (5,4,3,2,1, or full) - Decode each section separately to isolate individual barcodes - Use 2% overlap between sections to avoid missing boundary codes - Return the strategy that detects the most barcodes - Early exit optimization when fewer codes are found Also fix error handling to use Error objects instead of string literals. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-20 09:39:58 +01:00
parent 7ad2497757
commit 09c4cca67b
1 changed files with 74 additions and 15 deletions
--- a/app/lib/pdf/barcodeDecoderWasm.ts
+++ b/app/lib/pdf/barcodeDecoderWasm.ts
@@ -117,7 +117,7 @@ const file2canvas = async function (imageFile: File): Promise<HTMLCanvasElement>
                canvas.height = img.height;

                if (!ctx) {
-                    reject("Context is not set")
+                    reject(new Error("Context is not set"))
                    return;
                }

@@ -232,6 +232,7 @@ const canvasToImageData = (canvas: HTMLCanvasElement): ImageData => {

 /**
 * Searches the given canvas for all PDF417 codes and decodes them.
+ * Uses a slicing strategy to improve detection when multiple barcodes are present.
 * @param {HTMLCanvasElement} canvas - the canvas to search for PDF417 codes
 * @return {Promise<Array<DecodeResult> | null>} - an array of decoded results
 * */
@@ -240,28 +241,86 @@ const decodeFromCanvas = async (canvas: HTMLCanvasElement): Promise<Array<Decode
        const readerOptions: ReaderOptions = {
            tryHarder: true,
            formats: ['PDF417'],
-            maxNumberOfSymbols: 10,
+            maxNumberOfSymbols: 1, // Decode one barcode per slice
        };

-        // give browser a chance to re-paint
-        // this is needed to avoid UI freezing when decoding large images
-        await yieldToBrowser('decodeFromCanvas');
+        const width = canvas.width;
+        const height = canvas.height;

-        const imageData = canvasToImageData(canvas);
-        const results = await readBarcodes(imageData, readerOptions);
+        // Canvas can contain multiple PDF417 codes, so we need to try to find them all
+        // We will try splitting the canvas into different numbers of horizontal subsections
+        // and decode each subsection separately. The best result will be the one with the most codes found.
+        const splits = [5, 4, 3, 2, 1, 0];

-        const codesFound: Array<DecodeResult> = results
-            .filter(result => result.text)
-            .map((result) => ({
-                hub3aText: result.text,
-                billInfo: parseHubText(result.text),
-            }));
+        let bestResult: Array<DecodeResult> | null = null;

-        return (codesFound);
+        for (let splitIx = 0; splitIx < splits.length; splitIx++) {
+            const split = splits[splitIx];
+
+            // Add overlap to ensure we don't miss codes at section boundaries
+            const overlap = split === 0 ? 0 : Math.round(height / 50); // 2% overlap
+            const sectionHeight = split === 0 ? height : (Math.floor(Math.floor(height / split) + overlap));
+
+            // Create canvas sections
+            const canvasSections = Array.from({ length: split + 1 }, (_, i) => {
+                const sectionCanvas = document.createElement('canvas');
+                sectionCanvas.width = width;
+                sectionCanvas.height = sectionHeight;
+                const sectionContext = sectionCanvas.getContext('2d');
+
+                if (!sectionContext) {
+                    throw new Error('Failed to get canvas context');
+                }
+
+                // Calculate the starting Y position for each section
+                const startY = i === 0 ? 0 : i * sectionHeight - overlap;
+
+                // Draw the section of the original canvas onto the new section canvas
+                sectionContext.drawImage(canvas, 0, startY, width, sectionHeight, 0, 0, width, sectionHeight);
+                return sectionCanvas;
+            });
+
+            const codesFoundInSection: Array<DecodeResult> = [];
+
+            // Try to decode each section
+            for (const sectionCanvas of canvasSections) {
+                try {
+                    // give browser a chance to re-paint
+                    // this is needed to avoid UI freezing when decoding large images
+                    await yieldToBrowser('decodeFromCanvas');
+
+                    const imageData = canvasToImageData(sectionCanvas);
+                    const results = await readBarcodes(imageData, readerOptions);
+
+                    if (results.length > 0 && results[0].text) {
+                        const hub3aText = results[0].text;
+                        codesFoundInSection.push({
+                            hub3aText,
+                            billInfo: parseHubText(hub3aText),
+                        });
+                    }
+
+                } catch (error) {
+                    // If no code was found in the current section, continue to next section
+                }
+            }
+
+            await yieldToBrowser('after decodeFromCanvas');
+
+            // If in this iteration we found fewer or equal codes than in the previous best result,
+            // we can return the best result. This is an optimization.
+            if (bestResult && codesFoundInSection.length <= bestResult.length) {
+                return bestResult;
+            }
+
+            bestResult = codesFoundInSection;
+        }
+
+        return bestResult;

    } catch (error) {
        console.log(error);
-        return (null);
+        return null;
    }
 }