evidencija-rezija/web-app/app/lib/pdf/barcodeDecoderWasm.ts

import { PDFPageProxy } from 'pdfjs-dist';
import { readBarcodes, prepareZXingModule, type ReaderOptions } from 'zxing-wasm/reader';

// Configure WASM file location (similar to how pdf.worker.min.mjs is configured)
prepareZXingModule({
    overrides: {
        locateFile: (path, prefix) => {
            if (path.endsWith('.wasm')) {
                return window.location.origin + '/zxing_reader.wasm';
            }
            return prefix + path;
        }
    }
});

export type BillInfo = {
    header: string,
    currency: string,
    amount: number,
    payerName: string,
    payerAddress: string,
    payerTown: string,
    payeeName: string,
    payeeAddress: string,
    payeeTown: string,
    IBAN: string,
    model: string,
    reference: string,
    code: string,
    description: string,
};

/** Breaks current microtask execution and gives the UI thread a chance to do a re-paint */
const yieldToBrowser = (_label: string) => new Promise<boolean>((resolve) => {
    setTimeout(() => {
        resolve(true);
    }, 0);
});

/**
 * Decodes a PDF417 barcode
 * @param text
 * @returns
 * @description
 * Example text: "HRVHUB30\nEUR\n000000000012422\nDEREŽIĆ NIKOLA\nULICA DIVKA BUDAKA 17/17\n10000 ZAGREB\nGPZ-Opskrba d.o.o.\nRadnička cesta 1\n10000 Zagreb\nHR3623400091110343158\nHR05\n02964686-0307\nGASB\nAkontacijska rata za 01.2024.\n"
 *
 *    Decoded into:
 *        header: HRVHUB30
 *        currency:EUR
 *        amount:000000000012422
 *        payerName:DEREŽIĆ NIKOLA
 *        payerAddress:ULICA DIVKA BUDAKA 17/17
 *        payerTown:10000 ZAGREB
 *        payeeName:GPZ-Opskrba d.o.o.
 *        payeeAddress:Radnička cesta 1
 *        payeeTown:10000 Zagreb
 *        IBAN:HR3623400091110343158
 *        model:HR05
 *        reference:02964686-0307
 *        code:GASB
 *        description:Akontacijska rata za 01.2024.
 *
 */
const parseHubText = (text: string): BillInfo => {
    const [
        header,
        currency,
        amount,
        payerName,
        payerAddress,
        payerTown,
        payeeName,
        payeeAddress,
        payeeTown,
        IBAN,
        model,
        reference,
        code,
        description,
    ] = text.split('\n');

    return {
        header,
        currency,
        amount: parseInt(amount, 10),
        payerName,
        payerAddress,
        payerTown,
        payeeName,
        payeeAddress,
        payeeTown,
        IBAN,
        model,
        reference,
        code,
        description,
    };
}

/**
 * Render an image from the given file onto a canvas.
 * @param {File} imageFile - a file containing an image
 * @return {Promise<HTMLCanvasElement>} the canvas with the image rendered onto it
 */
const file2canvas = async function (imageFile: File): Promise<HTMLCanvasElement> {

    const reader = new FileReader();

    const canvas = await new Promise<HTMLCanvasElement>((resolve, reject) => {
        reader.onload = (progressEvent: ProgressEvent<FileReader>) => {
            const img = new Image();

            img.onload = () => {
                const canvas = document.createElement('canvas');
                const ctx = canvas.getContext('2d');
                canvas.width = img.width;
                canvas.height = img.height;

                if (!ctx) {
                    reject(new Error("Context is not set"))
                    return;
                }

                ctx.drawImage(img, 0, 0);

                resolve(canvas);
            };

            const result = (progressEvent.target as FileReader).result;

            img.src = result as string;
        };

        reader.onerror = (e) => reject(e);
        reader.readAsDataURL(imageFile);
    });

    return (canvas);
}

/**
 * Render an image from onto a canvas.
 * @param {String} imageBase64 - base64 encoded image string
 * @return {Promise<HTMLCanvasElement>} the canvas with the image rendered onto it
 */
const image2canvas = async function (imageBase64: string): Promise<HTMLCanvasElement> {

    const canvas = await new Promise<HTMLCanvasElement>((resolve, reject) => {
        const img = new Image();

        img.onload = () => {
            const canvas = document.createElement('canvas');
            const ctx = canvas.getContext('2d');
            canvas.width = img.width;
            canvas.height = img.height;

            if (!ctx) {
                reject("Context is not set")
                return;
            }

            ctx.drawImage(img, 0, 0);

            resolve(canvas);
        };

        img.src = imageBase64;
    });

    return (canvas);
};

/**
 * Render the first page of a PDF document onto a new canvas.
 * @param {File} pdfFile - a file containing a PDF document
 * @return {Promise<HTMLCanvasElement>} the canvas with the first page of the PDF
 */
const pdf2canvas = async function (pdfFile: File): Promise<Array<HTMLCanvasElement>> {

    const reader = new FileReader();
    const data = await new Promise<Uint8Array>((resolve, reject) => {
        reader.onload = (e) => resolve(new Uint8Array((e.target as FileReader).result as ArrayBuffer));
        reader.onerror = (e) => reject(e);
        reader.readAsArrayBuffer(pdfFile);
    });

    const pdfJS = await import('pdfjs-dist');

    // worker file was manually copied to the `public` folder
    pdfJS.GlobalWorkerOptions.workerSrc = window.location.origin + '/pdf.worker.min.mjs';

    const pdf = await pdfJS.getDocument(data).promise;

    const canvases: Array<HTMLCanvasElement> = [];

    for (let i = 0; i < pdf.numPages; i++) {
        const page: PDFPageProxy = await pdf.getPage(i + 1);

        const scale = 4;
        const viewport = page.getViewport({ scale });

        const canvas = document.createElement('canvas');
        const context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;

        await page.render({ canvasContext: context as CanvasRenderingContext2D, viewport }).promise;

        canvases.push(canvas);
    }

    return (canvases);
}

export type DecodeResult = {
    hub3aText: string,
    billInfo: BillInfo,
};

/**
 * Convert canvas to ImageData for zxing-wasm
 * @param canvas - HTMLCanvasElement to convert
 * @returns ImageData object
 */
const canvasToImageData = (canvas: HTMLCanvasElement): ImageData => {
    const ctx = canvas.getContext('2d');
    if (!ctx) {
        throw new Error('Failed to get canvas context');
    }
    return ctx.getImageData(0, 0, canvas.width, canvas.height);
};

/**
 * Searches the given canvas for all PDF417 codes and decodes them.
 * Uses a slicing strategy to improve detection when multiple barcodes are present.
 * @param {HTMLCanvasElement} canvas - the canvas to search for PDF417 codes
 * @return {Promise<Array<DecodeResult> | null>} - an array of decoded results
 * */
const decodeFromCanvas = async (canvas: HTMLCanvasElement): Promise<Array<DecodeResult> | null> => {
    try {
        const readerOptions: ReaderOptions = {
            tryHarder: true,
            formats: ['PDF417'],
            maxNumberOfSymbols: 1, // Decode one barcode per slice
        };

        const width = canvas.width;
        const height = canvas.height;

        // Canvas can contain multiple PDF417 codes, so we need to try to find them all
        // We will try splitting the canvas into different numbers of horizontal subsections
        // and decode each subsection separately. The best result will be the one with the most codes found.
        const splits = [5, 4, 3, 2, 1, 0];

        // Pre-allocate canvas pool (max 6 canvases needed for split=5)
        const canvasPool = Array.from({ length: 6 }, () => {
            const canvas = document.createElement('canvas');
            const ctx = canvas.getContext('2d');
            if (!ctx) {
                throw new Error('Failed to get canvas context');
            }
            return { canvas, ctx };
        });

        let bestResult: Array<DecodeResult> | null = null;

        for (let splitIx = 0; splitIx < splits.length; splitIx++) {
            const split = splits[splitIx];
            const sectionsNeeded = split + 1;

            // Add overlap to ensure we don't miss codes at section boundaries
            const overlap = split === 0 ? 0 : Math.round(height / 50); // 2% overlap
            const sectionHeight = split === 0 ? height : (Math.floor(Math.floor(height / split) + overlap));

            // Prepare canvases from pool
            for (let i = 0; i < canvasPool.length; i++) {
                const { canvas: sectionCanvas, ctx: sectionContext } = canvasPool[i];

                if (i < sectionsNeeded) {
                    // Resize and use this canvas
                    sectionCanvas.width = width;
                    sectionCanvas.height = sectionHeight;

                    // Calculate the starting Y position for each section
                    const startY = i === 0 ? 0 : i * sectionHeight - overlap;

                    // Draw the section of the original canvas onto this section canvas
                    sectionContext.drawImage(canvas, 0, startY, width, sectionHeight, 0, 0, width, sectionHeight);
                } else {
                    // Free unused canvases for this strategy
                    sectionCanvas.width = 0;
                    sectionCanvas.height = 0;
                }
            }

            const codesFoundInSection: Array<DecodeResult> = [];

            // Try to decode each section (only the ones we're using)
            for (let i = 0; i < sectionsNeeded; i++) {
                const { canvas: sectionCanvas } = canvasPool[i];

                try {
                    // give browser a chance to re-paint
                    // this is needed to avoid UI freezing when decoding large images
                    await yieldToBrowser('decodeFromCanvas');

                    const imageData = canvasToImageData(sectionCanvas);
                    const results = await readBarcodes(imageData, readerOptions);

                    if (results.length > 0 && results[0].text) {
                        const hub3aText = results[0].text;
                        codesFoundInSection.push({
                            hub3aText,
                            billInfo: parseHubText(hub3aText),
                        });
                    }

                } catch (error) {
                    // If no code was found in the current section, continue to next section
                }
            }

            await yieldToBrowser('after decodeFromCanvas');

            // If in this iteration we found fewer or equal codes than in the previous best result,
            // we can return the best result. This is an optimization.
            if (bestResult && codesFoundInSection.length <= bestResult.length) {
                return bestResult;
            }

            bestResult = codesFoundInSection;
        }

        return bestResult;

    } catch (error) {
        console.log(error);
        return null;
    }
}

/** Finds PDF417 code within a base64 encoded image and decodes it */
export const decodeFromImage = async (imageBase64: string): Promise<DecodeResult | null> => {
    const canvas = await image2canvas(imageBase64);

    const readerOptions: ReaderOptions = {
        tryHarder: true,
        formats: ['PDF417'],
        maxNumberOfSymbols: 1,
    };

    const imageData = canvasToImageData(canvas);
    const results = await readBarcodes(imageData, readerOptions);

    if (results.length === 0) {
        return null;
    }

    const hub3aText = results[0].text;

    return ({
        hub3aText,
        billInfo: parseHubText(hub3aText)
    });
}

/** Finds PDF417 code within a file and decodes it */
const decodeFromFile = async (file: File): Promise<DecodeResult[] | null> => {
    switch (file.type) {
        case 'image/png':
        case 'image/jpeg':
            return (await decodeFromCanvas(await file2canvas(file)));
        case 'application/pdf':
            const pageCanvas = await pdf2canvas(file);
            // go through each page of the PDF and decode the PDF417 codes
            // if there are multiple pages, we will decode each page separately
            // and return the results from all pages
            const results = (await Promise.all(pageCanvas.map(async (canvas) => {
                await yieldToBrowser('decodeFromCanvas');
                return await decodeFromCanvas(canvas);
            })))
                // remove null results (pages with no PDF417 codes)
                .filter((result) => result !== null)
                // flatten the array of arrays into a single array
                .flat() as DecodeResult[];

            return (results);
        default:
            console.error(file.name, 'is not a .pdf file.');
            return null;
    }
}

/**
 * Render the first page of a PDF document onto a new canvas.
 * @param {Event} event - The change event from an HTMLInputElement.
 * @return {Promise<HTMLCanvasElement | null>} The canvas with the first page of the PDF, or null if the document is not a PDF.
 */
export async function findDecodePdf417(event: React.ChangeEvent<HTMLInputElement>): Promise<Array<DecodeResult> | null> {
    const file = (event.target as HTMLInputElement).files?.[0];

    if (!file) {
        console.error('No file was selected.');
        return null;
    }

    return (await decodeFromFile(file));
}