-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKoboNotesPython.py
160 lines (99 loc) · 4.41 KB
/
KoboNotesPython.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 20:20:04 2018
@author: francisco
"""
import pandas as pd
import sqlite3
import warnings
warnings.filterwarnings("ignore")
import os
from docx import Document
import sys
def loadDatabase(file):
conn = sqlite3.connect(file)
## Bookmark table setup
bookmark = pd.read_sql_query("SELECT * FROM Bookmark", conn)
#formatar colunas
bookmark['DateCreated'] = pd.to_datetime(bookmark['DateCreated'])
bookmark['DateModified'] = pd.to_datetime(bookmark['DateModified'])
#sort
bookmark = bookmark.sort_values(['DateCreated','DateModified'], ascending=[1,1])
# select columns
bookmark= bookmark[['VolumeID','Text','Annotation','DateCreated','DateModified']]
bookmark = bookmark.dropna(subset=['Text'])
uniqueVolumeIDfromBookmark = bookmark.VolumeID.unique()
#conn.text_factory = bytes
content = pd.read_sql_query("SELECT ContentID,ContentType,MimeType,Title,Attribution FROM content", conn)
#filter content by books with highlights and/or annotations
content = content[content.ContentID.isin(uniqueVolumeIDfromBookmark)]
#sort
content = content.sort_values(['Title'], ascending=[1])
content = content.drop_duplicates(subset=['ContentID'])
conn.close()
return bookmark,content
def createUnifiedDatabase(bookmark,content):
bookmark['Title']='None'
for i in range(len(content)):
bookmark['Title'][bookmark.VolumeID==content.iloc[i].ContentID]=content.iloc[i].Title
bookmark = bookmark.sort_values(['Title','DateCreated'], ascending=[1,1])
return bookmark
def getBooks(file):
bookmark,content = loadDatabase(file)
unique_titles = content.Title.unique()
print('\n'.join(unique_titles))
def BookToTXT(file,book):
bookmark,content = loadDatabase(file)
data = createUnifiedDatabase(bookmark,content)
data = data[data.Title==book]
if(len(data)==0):
print("Book not available. Try one of the following books:\n")
print(getBooks(file))
else:
_saveTXTFile(data['Text'][data.Title==book].get_values(),data['Annotation'][data.Title==book].get_values(),book+".txt")
def saveBooksToTXT(file,folder):
bookmark,content = loadDatabase(file)
data = createUnifiedDatabase(bookmark,content)
books = data.Title.unique()
if not os.path.exists(folder):
os.makedirs(folder)
for book in books:
_saveTXTFile(data['Text'][data.Title==book].get_values(),data['Annotation'][data.Title==book].get_values(),os.path.join(folder,book+".txt"))
def _saveTXTFile(arrayText,arrayAnnotation,nameFile):
with open(nameFile, "w") as f:
for text,annotation in zip(arrayText,arrayAnnotation):
if annotation!=None and annotation!="":
f.write(text+" Annotation: "+annotation +"\n\n")
else:
f.write(text+"\n\n")
def saveBooksToWord(file,out_path):
document = Document()
bookmark,content = loadDatabase(file)
data = createUnifiedDatabase(bookmark,content)
books = data.Title.unique()
if not os.path.exists(os.path.dirname(out_path)) and os.path.dirname(out_path)!="":
os.makedirs(os.path.dirname(out_path))
for book in books:
arrayText = data['Text'][data.Title==book].get_values()
arrayAnnotation = data['Annotation'][data.Title==book].get_values()
document.add_heading(book, 1)
p = document.add_paragraph()
for text,annotation in zip(arrayText,arrayAnnotation):
while(text[0]=="." or text[0]==" " or text[0]=="," or text[0]==":"):
text=text[1:]
text = text.replace('\n','')
p = document.add_paragraph(text)
if annotation!=None and annotation!="":
p.add_run(' Annotation: ').bold = True
p.add_run(annotation)
document.add_page_break()
document.save(out_path)
if __name__ == "__main__":
if sys.argv[1]=='toDOCX':
if(len(sys.argv)>=4):
saveBooksToWord(sys.argv[2],sys.argv[3])
print('DOCX created')
else:
print("Please provide file .sqlite path and out path for .docx")
else:
print('Command not recognized')