-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
executable file
·113 lines (88 loc) · 3.58 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python
# encoding: utf-8
'''
Common unclassified functions
'''
from pylab import *
import numpy as np
import pandas as pd
from itertools import chain
from collections import deque
import sys
from datetime import datetime
def row(x):
'''Given sequence x returns numpy array x as row-vector with shape (1,len(x))
Author: Victor Kitov ([email protected]), 03.2016.'''
if not isinstance(x,np.ndarray):
x=np.array(x)
assert len(x.shape)==1,'x should contain only one axis!'
return array(x)[np.newaxis,:]
def vec(x):
'''Given sequence x returns numpy array x as vector-column with shape (len(x),1)
Author: Victor Kitov ([email protected]), 03.2016.'''
if not isinstance(x,np.ndarray):
x=np.array(x)
assert len(x.shape)==1,'x should contain only one axis!'
return x[:,np.newaxis]
def normalize(z):
'''Feature normalization
Author: Victor Kitov ([email protected]), 03.2016.'''
return (z-min(z))/(max(z)-min(z))
def all_nums(X):
'''
Prints if vector/series/matrix/dataframe consits only of numbers or contains inf or nan
Example:
A=eye(3)
A[1,1]=inf
A[2,2]=nan
A=pd.DataFrame(A,columns=['a','b','c'])
all_nums(A) # will return False and print that matrix is not composed only of numbers,
# because it has 1 inf for column 'b' and one nan for column 'c'.
Author: Victor Kitov ([email protected]), 03.2016.'''
def all_num_array(X, col_name=None):
try:
X = np.array(np.array(X),dtype=np.float64)
except ValueError:
error = 'not numeric type'
if all(np.isfinite(X)):
error = ''
else:
if ~any(np.isinf(X)) and any(np.isnan(X)):
error = 'has %i nan' % np.isnan(X).ravel().sum()
if any(np.isinf(X)) and ~any(np.isnan(X)):
error = 'has %i inf' % np.isinf(X).ravel().sum()
if any(np.isinf(X)) and any(np.isnan(X)):
error = 'has %i nan and %i inf' % (np.isnan(X).ravel().sum(),np.isinf(X).ravel().sum())
if error=='': # all numbers
return True
else: # not all numbers
if col_name is None:
print(error)
else:
print('%s: %s'%(col_name,error))
return False
if isinstance(X,np.ndarray):
if len(X.shape)==1:
return all_num_array(X)
elif len(X.shape)==2:
res = np.zeros(X.shape[1],dtype=bool)
for col in range(X.shape[1]):
res[col] = all_num_array(X[:,col],'column%d'%col)
if all(res):
print('SUCCESS, all columns have numbers.\n')
else:
print('FAIL, not all columns have numbers.')
else:
raise ValueError('Number of dimensions of X is more than 2!')
elif isinstance(X,(list,tuple,pd.Series)):
return all_num_array(X)
elif isinstance(X,(pd.DataFrame)):
res = np.zeros(len(X.columns),dtype=bool)
for i,col_name in enumerate(X.columns):
res[i] = all_num_array(X[col_name].values, col_name)
if all(res):
print('SUCCESS, all columns have numbers.\n')
else:
print('FAIL, not all columns have numbers')
else:
raise ValueError('Invalid type!')