-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathminer.py
74 lines (60 loc) · 2.03 KB
/
miner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""MINER - A script for downloading, unpacking, and converting public and open data into your preferred local format
http://alexanderjfink.github.io/miner
Usage:
miner.py (search|dig) <dataset>
miner.py (assay|describe) <dataset>
miner.py (extract|install) <dataset> [--subset <subset>]
Options:
-h --help Show this screen.
-v --version Show version.
"""
# External Libraries
from docopt import docopt
# Local libraries
from library.utils.helpers import *
from library.utils.db import *
# Temporary module imports
from library.maps.uscensus import *
from library.maps.usform990 import *
from library.maps.nycpolicepenalties import *
if __name__ == '__main__':
args = docopt(__doc__, version='Miner 0.0.1a')
# TODO: SHOULD BUILD THIS MAP OUT OF A LISTING OF MAPS IN THE MAPS/ FOLDER
maps = {
'uscensus2010': USCensus2010,
'usform990': USForm990Extracts,
'nycpolicepenalties': NYCPolicePenalties,
}
# TODO: Should I switch this to a dictionary switching model from Learning Python?
if args['install'] or args['extract']:
# Try to extract the data.
# Currently this involves running a "map" to the data, involving:
# - download -- download the data from a server
# - unpack -- unzip the data and clean it
# - install -- send it to the configured database
# need some **kwargs
# install_location
try:
proc = maps[args['<dataset>']]()
proc.setup()
proc.download()
proc.unpack()
proc.install()
proc.cleanup()
except KeyError:
print "Can't find dataset. Try miner search <dataset>"
else:
print "Dataset " + args['<dataset>'] + " installed successfully."
elif args['describe'] or args['assay']:
try:
proc = maps[args['<dataset>']]()
print proc.description
print proc.homepage
except KeyError:
print "Can't find dataset. Try miner search <dataset>"
elif args['search'] or args['dig']:
# TODO: Need to add fuzzy searching of datasets
try:
proc = maps[args['<dataset>']]()
except KeyError:
print "Dataset does not exist. Add it by visiting http://www.github.com/alexanderjfink/miner"