Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sergey cheremshinsky #9

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6b1b2a1
Add files via upload
sergTch Jan 29, 2021
69c472b
Add files via upload
sergTch Jan 29, 2021
2c9387d
Delete npFunctions.py
sergTch Jan 29, 2021
0aa1ba5
Add files via upload
sergTch Jan 29, 2021
759d07f
Add files via upload
sergTch Jan 29, 2021
44c5008
Add files via upload
sergTch Jan 29, 2021
4d21270
Add files via upload
sergTch Jan 29, 2021
8877bea
Add files via upload
sergTch Jan 29, 2021
4e6ee2d
Add files via upload
sergTch Jan 29, 2021
c551077
Rename main.py to sergey_cheremshinsky/task1(titanic)/main.py
sergTch Feb 3, 2021
3adc561
Rename ann_model.py to sergey_cheremshinsky/task1(titanic)/ann_model.py
sergTch Feb 3, 2021
75cb6fb
Rename sklearn_models.py to sergey_cheremshinsky/task1(titanic)/sklea…
sergTch Feb 3, 2021
6451a09
Rename test.csv to sergey_cheremshinsky/task1(titanic)/test.csv
sergTch Feb 3, 2021
b9e290a
Rename titanic.csv to sergey_cheremshinsky/task1(titanic)/titanic.csv
sergTch Feb 3, 2021
8a5e2e6
Rename train.csv to sergey_cheremshinsky/task1(titanic)/train.csv
sergTch Feb 3, 2021
96a068b
Add files via upload
sergTch Feb 3, 2021
5a8ef42
Rename sergey_cheremshinsky/get_faces.py to sergey_cheremshinsky/task…
sergTch Feb 3, 2021
6676cae
Rename sergey_cheremshinsky/load_data.py to sergey_cheremshinsky/task…
sergTch Feb 3, 2021
4714d29
Rename sergey_cheremshinsky/main.py to sergey_cheremshinsky/task3(mas…
sergTch Feb 3, 2021
a46cbb9
Rename sergey_cheremshinsky/log.txt to sergey_cheremshinsky/task3(mas…
sergTch Feb 3, 2021
a3c8f90
Rename sergey_cheremshinsky/model.py to sergey_cheremshinsky/task3(ma…
sergTch Feb 3, 2021
b32c9d5
Add files via upload
sergTch Feb 3, 2021
5554256
Add files via upload
sergTch Feb 3, 2021
5e8426a
Add files via upload
sergTch Feb 9, 2021
8baff4c
Add files via upload
sergTch Feb 9, 2021
90c03d5
Add files via upload
sergTch Feb 9, 2021
e40aea8
Create main.py
sergTch Feb 9, 2021
e4bcc45
Rename sergey_cheremshinsky/task4(Faces)/main.py to sergey_cheremshin…
sergTch Feb 9, 2021
d6c180f
Add files via upload
sergTch Feb 9, 2021
d0b595e
Add files via upload
sergTch Feb 9, 2021
bddea71
Add files via upload
sergTch Feb 9, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions sergey_cheremshinsky/task1(titanic)/ann_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import tensorflow as tf


def chooseBestModel(x_train, y_train, x_test, y_test):
activations = ['relu', 'sigmoid', 'softmax']
f = open("rez.txt", "a")

for i in range(10, 150):
for ai in activations:
for j in range(10, 150):
for aj in activations:
f.write(str([[i, ai], [j, aj]]) + " " + str(testLData(x_train, y_train, x_test, y_test, [[i, ai], [j, aj]])) + "\n")
f.close()


def ann_predict(model, x, possible_y):
y = model.predict(x)
y = (y[:, 0] >= 0.5).astype(int)
return y


def testLData(x_train, y_train, x_test, y_test, lData):
s = 0
for i in range(10):
s += annModel(x_train, y_train, x_test, y_test, lData)[1]
return s / 10


def annModel(x_train, y_train, x_test, y_test, lData):
layers = [tf.keras.layers.Flatten(input_shape=(33,))]
for i in lData:
layers.append(tf.keras.layers.Dense(i[0], activation=i[1]))
layers.append(tf.keras.layers.Dense(1, activation='sigmoid'))

model = tf.keras.Sequential(layers)
y_train = y_train.astype(float)
model.compile(
optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy']
)
model.fit(x_train, y_train, epochs=100)
test_loss, test_acc = model.evaluate(x_test, y_test)
print("accuracy:", test_acc)
return model, test_acc
156 changes: 156 additions & 0 deletions sergey_cheremshinsky/task1(titanic)/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
from sklearn_models import sklearn_models, boosted_test
from ann_model import annModel, ann_predict

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import warnings

warnings.filterwarnings('ignore')

enc = LabelEncoder()

sc = StandardScaler()

data = pd.read_csv("train.csv")
print(data.shape)


def splitColumn(data, feature):
vals = set(data[feature])
for val in vals:
if str(val) == "nan":
continue
a = []
for i in range(data.shape[0]):
if data[feature][i] == val:
a.append(1)
else:
a.append(0)
data[feature + "_" + str(val)] = a
return data.drop([feature], axis=1)


def splitFloatColumn(data, feature, diapasones):
for i in range(len(diapasones) + 1):
a = []
for j in range(data.shape[0]):
t = True
if i > 0:
t &= (data[feature][j] >= diapasones[i - 1])
if i < len(diapasones):
t &= (data[feature][j] < diapasones[i])
if t:
a.append(1)
else:
a.append(0)
data[feature + "_" + str(i)] = a
return data.drop([feature], axis=1)


def showSurvival(data):
features = ['Fare']
rows = 1
cols = 1

fig, axs = plt.subplots(rows, cols, figsize=(cols * 3.2, rows * 3.2))

for r in range(rows):
for c in range(cols):
i = r * cols + c
sns.countplot(data[features[i]], hue=data["Survived"], ax=axs)
axs.legend(title="Survived", loc='upper right')

plt.tight_layout()
plt.show()


def printVals(data):
for val in data:
print(data[val].value_counts())
print()


def preprocess(data):
for i in range(data.shape[0]):
if np.isnan(data["Age"][i]):
# data["Age"][i] = 1000
if "Ms" in data["Name"][i] or "Miss" in data["Name"][i]:
data["Age"][i] = 15
elif "Mr" in data["Name"][i] or "Mrs" in data["Name"][i]:
data["Age"][i] = 30
elif "Don." in data["Name"][i]:
data["Age"][i] = 40
else:
data["Age"][i] = 10

data = data.drop(['PassengerId', 'Ticket', 'Name', 'Cabin'], axis=1)
print(data)
data = splitColumn(data, "Embarked")
data = splitColumn(data, "Sex")
data = splitColumn(data, "Pclass")
data = splitFloatColumn(data, "Parch", [1, 2, 4])
data = splitFloatColumn(data, "SibSp", [1, 2, 4])
data = splitFloatColumn(data, "Age", [2, 5, 10, 16, 20, 28, 35, 45])
data = splitFloatColumn(data, "Fare", [7, 9, 10, 15, 30, 60, 80])

for feature in data:
print(feature)
for i in range(data.shape[0]):
if np.isnan(data[feature][i]):
data[feature][i] = 0

print(data)
return data


def split_data(data):
return (data.iloc[:, 1:].values, data.iloc[:, 0].values)


data = preprocess(data)
x, y = split_data(data)

a = []
for i in range(1):
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
x_train = x
y_train = y
# exit()
ann, acc = annModel(x_train, y_train, x_test, y_test, [[50, 'relu']])

# chooseBestModel(x_train, y_train, x_test, y_test)
models = sklearn_models(x_train, y_train, x_test, y_test)

rez = boosted_test(models, x_test, y_test)
a.append(rez)

print()
print(a)
print("mean:", sum(a) / len(a))


def generateAns():
test_ids = pd.read_csv("test.csv")["PassengerId"]
test = pd.read_csv("test.csv")

test = preprocess(test)
x, y = split_data(test)

# Y_pred = models_predict(models, x)
y = ann_predict(ann, x, [])

submission = pd.DataFrame({
"PassengerId": test_ids,
"Survived": y
})
submission.to_csv('./titanic.csv', index=False)
print('Exported!')


generateAns()
53 changes: 53 additions & 0 deletions sergey_cheremshinsky/task1(titanic)/sklearn_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
def sklearn_models(x_train, y_train, x_test, y_test):
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion="entropy")
tree.fit(x_train, y_train)

from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression(max_iter=1000000)
logReg.fit(x_train, y_train)

from sklearn.neighbors import KNeighborsClassifier
neighbors = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=2)
neighbors.fit(x_train, y_train)

from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(n_estimators=10, criterion="entropy")
forest.fit(x_train, y_train)

from sklearn.naive_bayes import GaussianNB
gauss = GaussianNB()
gauss.fit(x_train, y_train)

from sklearn.svm import SVC
svc = SVC(kernel="linear")
svc.fit(x_train, y_train)

svcRBF = SVC(kernel="rbf")
svcRBF.fit(x_train, y_train)

return [tree, forest, logReg, gauss, svc]


def models_predict(models, x):
rez = sum(model.predict(x) for model in models) / len(models)
for i in range(len(rez)):
if rez[i] < 0.5:
rez[i] = 0
else:
rez[i] = 1
return rez.astype(int)


def boosted_test(models, x_test, y_test):
for model in models:
print(model.score(x_test, y_test))
rez = models_predict(models, x_test)
k = t = 0

for i in range(len(y_test)):
k += 1
if rez[i] == y_test[i]:
t += 1
print(k, t, t / k)
return t / k
Loading