This repository has been archived by the owner on Mar 4, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdown_data_in_parts_tot.py
executable file
·98 lines (80 loc) · 3.45 KB
/
down_data_in_parts_tot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from datetime import datetime as dt, timedelta as td
from calendar import monthrange as ndays
import re
import os
import tables
from pylab import *
from hisparc.publicdb import download_data
from remove_duplicate_events import remove_duplicate_events
from check_if_weather import check_if_weather
def down_data_in_parts(station_id, user_start_date_data_interval,
user_stop_date_data_interval):
""" Download HiSPARC data in parts
This function will split the downloading of data into months, making
one file for each part. This may make working with large data sets easier
on slower computers.
The end date is not included, it is not 'up to and including ..'
So start=dt(2011, 6, 29), stop=dt(2011, 6, 30)
Only downloads data from 29th June.
"""
# split user input date strings
start_list = re.split(',', user_start_date_data_interval)
stop_list = re.split(',', user_stop_date_data_interval)
# turn the list of strings into integers
start_list_int = [int(x) for x in start_list]
stop_list_int = [int(x) for x in stop_list]
# turn the date list into date objects
start_date = dt(start_list_int[0], start_list_int[1], start_list_int[2])
stop_date = dt(stop_list_int[0], stop_list_int[1], stop_list_int[2])
downloaded_data_files = []
if start_date >= stop_date:
print 'Error: Start date is after stop date!'
elif stop_date >= dt.today():
print 'Error: End date is after today!'
elif start_date < dt(2004, 1, 1):
print 'Error: Start date is before the start of the HiSPARC project!'
else:
for start, stop in monthrange(start_date, stop_date):
filename = download_part(station_id, start, stop)
downloaded_data_files.append(filename)
print 2 * "\a"
return downloaded_data_files
def monthrange(start, stop):
""" A generator function that yields datetime objects in ranges of months
If start and stop are more than 31 days apart the interval is split into
months. The first will be from start until the end of its month, then
each month upto the month of stop, and finally from the start of that
month until stop.
"""
a_day = td(days=1)
if (stop - start).days < 31:
yield start, stop
return
else:
cur = start + td(days=ndays(start.year, start.month)[1] - start.day + 1)
yield start, cur
while cur + td(days=ndays(cur.year, cur.month)[1]) < stop:
yield cur, cur + td(days=ndays(cur.year, cur.month)[1])
cur += td(days=ndays(cur.year, cur.month)[1])
yield cur, stop
return
def download_part(station_id, start, stop):
""" Download HiSPARC data from start till stop
"""
hisp_station = int(station_id)
tree = '/s' + station_id
path = 'data.root.s' + station_id
filename = 'data_s%d_%s_%s.h5' % (hisp_station,
start.strftime('%Y%m%d'),
stop.strftime('%Y%m%d'))
with tables.openFile(filename, 'w') as data:
download_data(data, tree, hisp_station, start, stop)
remove_duplicate_events(data, eval(path))
weather_data_in_file = check_if_weather(data, filename)
if weather_data_in_file:
remove_duplicate_events(data, eval(path), kind='weather')
print ''
print "'" + filename + "' has downloaded."
print 'You can find it at the location: ' + os.getcwd()
print ''
return filename