-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
94 lines (71 loc) · 2.68 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
Main executable
Command line options:
- '--all' to run everything
- '--load-data' if you want to perform the data loading process
- '--eda' if you want to print some simple descriptive statistics
- '--models' or '--model' to build models
"""
# Import modules from pipeline/
# and from utils/
import pipeline.sql as plsql
import pipeline.models as plmod
import pipeline.results as plres
import utils.load_data as uload
import utils.eda as ueda
import utils.models as umod
import utils.clean_data as uclean
import utils.stage_data as ustage
import utils.stage_data as ustage
import utils.temp_clean as utclean
from sys import argv
from os import path
from random import choice
def main():
# Check for and load the correct .yaml files
if not path.exists('config.yaml'):
print('Please create or rename your .yaml file to config.yaml.')
exit()
config = plsql.read_yaml('config.yaml')
# Allow for custom config_experiment file,
# but default to config_experiments.yaml
config_exp_path_ls = [path for path in argv if '.yaml' in path]
if len(config_exp_path_ls) != 0:
config_exp_path = config_exp_path_ls[0]
else:
config_exp_path = 'config_experiments.yaml'
if not path.exists(config_exp_path):
print('Missing experiment configuration yaml file.')
exit()
config_exp = plsql.read_yaml(config_exp_path)
# Create sqlalchemy engine for SQL queries
engine = plsql.create_engine('config.yaml')
######### Load data part of pipeline
if '--load-data' in argv:
print("\nLOADING DATA.......")
print("WARNING: this part of the pipeline is incomplete.")
# Get data locations
uload.load_data(engine, config['data_dirs'])
########## Clean data part of pipeline
if '--clean-data' in argv or '--all' in argv:
# Make raw files usable
print("Currently only cleaning data, not loading.")
uclean.clean_data(engine)
######### Staging data
if '--stage-data' in argv or '--all' in argv:
print("\nSTAGING DATA........")
ustage.stage_data(engine)
######### Model
if '--model' in argv or '--models' in argv or '--all' in argv:
print("\nMODELING.......")
# Create models
umod.create_models(engine, config_exp, config['output_dir'])
# TODO: Silly test, don't leave this here
print(choice(["I wish Chile had qualified for the World Cup.",
"Wow, Team Chile is the best team!",
"Let's go to Kilwin's!",
"wow look at all these great results!!!!",
"Who is guarding DSaPP's hot Cheetos?",
":viola:"]))
if __name__ == "__main__":
main()