From 8f57eca962d8ece345e6e9810ab5ebe95731e9e8 Mon Sep 17 00:00:00 2001 From: Mathu-lmn <80094438+Mathu-lmn@users.noreply.github.com> Date: Tue, 10 Dec 2024 22:00:46 -0500 Subject: [PATCH] =?UTF-8?q?alors=20peut=20=C3=AAtre=20=3F=3F=3F=3F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- predictions.py | 16 ++++++++++------ test.py | 21 +++++++++++++++++++-- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/predictions.py b/predictions.py index 6e0e4a2..16db927 100644 --- a/predictions.py +++ b/predictions.py @@ -19,7 +19,7 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier - from sklearn.metrics import accuracy_score, f1_score + from sklearn.metrics import classification_report, f1_score # from sklearn.model_selection import StratifiedKFold except ImportError: print("You need to install scikit-learn") @@ -102,11 +102,14 @@ model = models[name] param_dist = param_distributions[name] + def scoringfunc(estimator, X, y): + return f1_score(y, estimator.predict(X)) + randomized_search = RandomizedSearchCV( estimator=model, param_distributions=param_dist, n_iter=NUM_TRIALS, - scoring='f1', + scoring=scoringfunc, cv=3, random_state=42, n_jobs=-1, @@ -119,7 +122,9 @@ model.set_params(**best) model.fit(X_train, y_train.values.ravel()) - train_accuracy = accuracy_score(y_train, model.predict(X_train)) + if DEBUG: + print(classification_report(y_test, model.predict(X_test))) + f1_test = f1_score(y_test, model.predict(X_test)) signature = infer_signature(X_train, model.predict(X_train)) @@ -127,10 +132,9 @@ with mlflow.start_run(run_name=name) as run: mlflow.log_params(best) mlflow.sklearn.log_model(model, "model", signature=signature) - model_uri = mlflow.get_artifact_uri("model") - + mlflow.evaluate( - model=model_uri, + model="runs:/" + run.info.run_id + "/model", data=evalData, targets='label', model_type='classifier', diff --git a/test.py b/test.py index 1217b81..bf00c20 100644 --- a/test.py +++ b/test.py @@ -4,9 +4,11 @@ import random from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler +from sklearn.metrics import classification_report, f1_score mlflow.set_tracking_uri("https://mlflow.docsystem.xyz") -model = mlflow.pyfunc.load_model(model_uri="models:/attackdetection/latest") +# load each models in the RandomSearch experiment +model = mlflow.sklearn.load_model("runs:/12498379aa574e01b6da4cff313479dd/model") LABELS_NUM = ["BENIGN", "SUS"] @@ -27,6 +29,21 @@ X_train = pd.DataFrame(X_train, columns=features.columns) X_test = pd.DataFrame(X_test, columns=features.columns) +X_train = X_train.reset_index(drop=True) +y_train = y_train.reset_index(drop=True) +X_test = X_test.reset_index(drop=True) +y_test = y_test.reset_index(drop=True) + + +# calculate the model's metrics +def calculate_metrics(): + y_pred = model.predict(X_test) + print(classification_report(y_test, y_pred)) + + print(f1_score(y_test, y_pred)) + +calculate_metrics() + good_predictions, bad_predictions = 0, 0 def predict_row(row: int): @@ -41,7 +58,7 @@ def predict_row(row: int): return LABELS_NUM[y_test.iloc[row].values[0]] == prediction -while True: +while False: for i in tqdm(range(10000)): j = random.randint(0, len(X_test) - 1) is_good_prediction = predict_row(j)