-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdataset.py
58 lines (46 loc) · 1.61 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import pandas as pd
headers_feature = ['z', 'eps', 'N2', 'eff']
nz_profile = 512
nfeatures = 2
def load_data(fdir, ratio=1.0):
"""
Returns the mixing data-set as (train_x, train_y), (test_x, test_y). where _x and _y denote "features" and "labels"
respectively.
"""
data = pd.read_csv(fdir, names=headers_feature, skiprows=1)
eff = data.pop('eff')[0:data.shape[0]:nz_profile]
zz = data.pop('z')
# data must be reshaped
features = np.array(data, dtype=np.float32).reshape((-1, nz_profile, nfeatures)).transpose((0,2,1))
labels = np.array(eff, dtype=np.float32)
# normalize the input features
# features = normalize_data(features, axis=2)
if ratio < 1.0:
# divide dataset (85-15) for training-testing
train_x, test_x = split_data_train_test(features, ratio)
train_y, test_y = split_data_train_test(labels , ratio)
return (train_x, train_y), (test_x, test_y)
else:
return features, labels
def split_data_train_test(data, ratio):
"""
splits feature or label data into "train" and "test"
"""
# divide dataset (85-15) for training-testing
nexample = data.shape[0]
nexample_train = np.int32(ratio * nexample)
# split data-set into training and testing set
train_set = data[:nexample_train]
test_set = data[nexample_train:]
return train_set, test_set
def save_data(data_dic,fname):
"""
"saves the the pands DataFrame into CSV file
:param fname: file_name
:param df: DataFrame
:return:
"""
df = pd.DataFrame(data=data_dic)
df.to_csv(fname, sep='\t')
return