-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsectionExtract.py
64 lines (48 loc) · 1.71 KB
/
sectionExtract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 8 20:45:43 2018
@author: Ashwin
"""
import simplejson as json
import pickle
import datetime
import sqlite3
import traceback
import os
import dateutil.parser as dp
if os.path.exists("sectionintegritylog"):
os.remove("sectionintegritylog")
inputFile = "/home/akshay/IIT KGP/SEM 2/Complex Network/Term Project/articleJSON.txt"
db = sqlite3.connect('../commentsData.db')
c = db.cursor()
c.execute('CREATE TABLE IF NOT EXISTS ArticleSection(id text,section text,PRIMARY KEY(id,section) ) ')
def writeArticleInDB(article):
ID = article['_id']
secName = article['section_name']
sqlStat = "INSERT INTO ArticleSection VALUES(?,?)"
if secName!=None and secName!='':
c.execute(sqlStat,(ID,secName))
def parseFile():
noOfLinesParsed = 0
with open(inputFile) as f:
for line in f:
try:
noOfLinesParsed += 1
line = json.loads(line)
for record in line['response']['docs']:
writeArticleInDB(record)
if (noOfLinesParsed % 1000 == 0):
print "No of lines Parsed : ", noOfLinesParsed
except sqlite3.IntegrityError as i:
with open("sectionintegritylog", "a") as ilog:
ilog.write("Line no : " + str(noOfLinesParsed) + "\n")
except Exception as e:
if e.message!='response':
with open("log", "a") as log:
log.write("Error on line " + str(noOfLinesParsed) + "\n")
log.write(traceback.format_exc())
db.commit()
if __name__ == "__main__":
parseFile()
db.close()