-
Notifications
You must be signed in to change notification settings - Fork 0
/
WIN.PDF.TXT.EXTRACTOR.py
39 lines (39 loc) · 2.15 KB
/
WIN.PDF.TXT.EXTRACTOR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#- *************************************************************************************************************:
#- ******************************************** EXTRACT TXT FROM PDF *******************************************:
#- *************************************************************************************************************:
#- Author: JBallard (JEB) :
#- Date: 2017.3.11 :
#- Script: WIN.PDF.TXT.EXTRACTOR.py :
#- Purpose: A Python Script that extracts all the Text from a PDF file. :
#- Version: 1.0 :
#- *************************************************************************************************************:
#- *************************************************************************************************************:
#-
#-- ********************************************************:
#-- DEFINE PARAMS, CONSTANTS, CONFIG PATHS, IMPORT CLASSES :
#-- ********************************************************:
import PyPDF2
#-
pdf1File = open('example.pdf', 'rb')
pdf2File = open('example2.pdf', 'rb')
#- READ PDF FILES:
pdf1Reader = PyPDF2.PdfFileReader(pdf1File)
pdf2Reader = PyPDF2.PdfFileReader(pdf2File)
#- WRITE TO NEW PDF FILE:
pdfWriter = PyPDF2.PdfFileWriter()
for pageNum in range(pdf1Reader.numPages):
pageObj = pdf1Reader.getPage(pageNum)
pdfWriter.addPage(pageObj)
for pageNum in range(pdf2Reader.numPages):
pageObj = pdf2Reader.getPage(pageNum)
pdfWriter.addPage(pageObj)
#- POPULATE NEW PDF FILE:
pdfOutputFile = open('example3.pdf', 'wb')
pdfWriter.write(pdfOutputFile)
pdfOutputFile.close()
pdf1File.close()
pdf2File.close()
#-
#-- ********************************************************:
#-- END OF PYTHON SCRIPT :
#-- ********************************************************: