-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanipulate_data_frame.py
128 lines (88 loc) · 3.53 KB
/
manipulate_data_frame.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#read old_df.pq
#create new indicators
#create your class of visualizations
#save & bokeh for images
# drop all rows with nans
# review and learn pyspart
import logging
from numba import jit
from sklearn import preprocessing
import os
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show, output_file
class Stock_Data_Test_Args:
def __init__(self):
#stock data
self.NASDAQ_URL = r"https://en.wikipedia.org/wiki/NASDAQ-100"
self.list_ticks = r'C:\Users\shawn paul\Desktop\PyFinanceProj\NASDAQPrediction\Stock_Data'
def get_ticker_list(self):
stock_pqs = os.listdir(r'C:\Users\shawn paul\Desktop\PyFinanceProj\NASDAQPrediction\Stock_Data')
tickers = []
for stk in stock_pqs:
stk = stk[:-8]#remove file type
tickers.append(stk)
return tickers
#needs a loist of stock tickers passed into this
class create_and_plot_indicators:
def __init__(self,stock):
self.stock_list = stock
self.sym = self.stock_list[0]
self.fourier = "fft"
self.price_col = 'Close'
self.freq = 35
self.old_df_path = r"C:\Users\shawn paul\Desktop\PyFinanceProj\NASDAQPrediction\stored_data\old_df.parquet"
self.data_path = r"C:\Users\shawn paul\Desktop\PyFinanceProj\NASDAQPrediction\stored_data"
def read_stock(self):
#reads single dataframe
path = r"C:\Users\shawn paul\Desktop\PyFinanceProj\NASDAQPrediction\Stock_Data\{}.parquet".format(self.sym)
df = pd.read_parquet(path)
return df
def create_plots(self):
#gets single dataframe
df = self.read_stock()
df = self.fourier_transform_plots(df)
source = ColumnDataSource(df)
df.index.name = 'Date'
p = figure(x_axis_type="datetime", plot_width=800, plot_height=350)
p.line('Date', self.price_col, source=source)
p.line('Date', self.fourier, source=source)
output_file("stcok_price.html")
show(p)
def fourier_transform_plots(self,df1):
#singledataframeread in timeseries of one stock and fft/ifft
df = df1
'''
price = self.price_col
for column in df:
a = df[self.price_col]
fft = np.fft.fft(a)
fft[self.freq:] = 0
itx = np.fft.ifft(fft)
df['fft'] = itx.real
return df'''
def read_and_display_old_df(self):
path = self.old_df_path
df = pd.read_parquet(path,engine = 'pyarrow')
print(df.head(5))
def EDA_old_df(self):
#return
pass
def clean_missing data(self):
#interpolate cubically forward:https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html#filling-missing-values-fillna
#return a csv that has the starting data of each column
def create_indicators(df1,price_col,syms):
df = df1
for sym in syms:
df['ma7_'.format(sym)] = df[price_col.format(sym)].rolling(window=7).mean()
df['ma14_'.format(sym)] = df[price_col.format(sym)].rolling(window=14).mean()
df['ema_'.format(sym)] = df[price_col.format(sym)].rolling(window=7).mean()
return df
#delte after use
def get_test_stock():
pass
stk = Stock_Data_Test_Args()
stock_ticker = stk.get_ticker_list()
c = create_and_plot_indicators(stock_ticker)
c.read_and_display_old_df()