Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: reading multiple pdf files with a single PDFParser object #371

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion jest.config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"testMatch": ["**/test/_test_.*"],
"testMatch": ["**/test/_test_*"],
"testEnvironment": "node",
"bail": false,
"testFailureExitCode": 1
Expand Down
5 changes: 5 additions & 0 deletions lib/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ export default class PDFJSClass extends EventEmitter {

raiseReadyEvent(data) {
process.nextTick(() => this.emit("pdfjs_parseDataReady", data));
if(data===null){
//reset the state of pages
this.pages=[];
}
return data;
}

Expand Down Expand Up @@ -299,6 +303,7 @@ export default class PDFJSClass extends EventEmitter {
};

this.pages.push(page);

this.emit("data", page);

if (this.needRawText) {
Expand Down
8 changes: 7 additions & 1 deletion pdfparser.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,13 @@ export default class PDFParser extends EventEmitter {
nodeUtil.p2jinfo("PDF parsing completed.");
this.emit("pdfParser_dataReady", this.#data);
} else {
this.#data = { ...this.#data, ...data };
if(this.#data===data){
nicolabaesso marked this conversation as resolved.
Show resolved Hide resolved
this.#data=data;
}
else{
this.#data = { ...this.#data, ...data };
}

}
}

Expand Down
54 changes: 54 additions & 0 deletions test/_test_testMultipleDataPDF.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
const assert = require("assert");
const fs = require("fs");

const PDFParser = require("../dist/pdfparser.cjs");
// we want to read two (or more) PDF files without recreating a reference to PDFParser
describe("Multiple PDFs with same structure",()=>{
test("Read different values",async ()=>{
// the target PDFs for this test have only 3 values: Name, Surname and BirthDate.
// you can find the PDFs in test/pdf/mpf
let parser=new PDFParser();
const firstPDFLocation=__dirname+"/pdf/mpf/testPDF.pdf";
const secondPDFLocation=__dirname+"/pdf/mpf/testPDF2.pdf";
const firstPDFBuffer=fs.readFileSync(firstPDFLocation);
const secondPDFBuffer=fs.readFileSync(secondPDFLocation);
//we need to check if buffers are indeed different, otherwise it's useless!
expect(firstPDFBuffer).not.toBe(secondPDFBuffer);
const firstPDFData=await new Promise((resolve,reject)=>{
parser.parseBuffer(firstPDFBuffer,5);
parser.on("pdfParser_dataReady", (evtData) => {
resolve(evtData);
});

parser.on("pdfParser_dataError", (evtData) => {
reject(evtData);
});
});
const secondPDFData=await new Promise((resolve,reject)=>{
parser.parseBuffer(secondPDFBuffer,5);
parser.on("pdfParser_dataReady", (evtData) => {
resolve(evtData);
});

parser.on("pdfParser_dataError", (evtData) => {
reject(evtData);
});
});
const firstData=await firstPDFData;
const secondData=await secondPDFData;
//first, make sure the files are read
expect(firstData).toBeDefined();
expect(firstData.Pages[0]).toBeDefined();
expect(firstData.Pages[0].Fields).toBeDefined();
expect(secondData).toBeDefined();
expect(secondData.Pages[0]).toBeDefined();
expect(secondData.Pages[0].Fields).toBeDefined();
//then, we check if the files have the correct values
expect(firstData.Pages[0].Fields[0].V).toBe("Mario");
expect(firstData.Pages[0].Fields[1].V).toBe("Rossi");
expect(firstData.Pages[0].Fields[2].V).toBe("01/01/1990");
expect(secondData.Pages[0].Fields[0].V).toBe("Luigi");
expect(secondData.Pages[0].Fields[1].V).toBe("Verdi");
expect(secondData.Pages[0].Fields[2].V).toBe("01/01/1991");
});
});
Binary file added test/pdf/mpf/testPDF.pdf
Binary file not shown.
Binary file added test/pdf/mpf/testPDF2.pdf
Binary file not shown.
Loading