Add flake8 rules

rnd4u-org · Jan 25, 2021 · 1d36548 · 1d36548
1 parent 457d541
commit 1d36548
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 35 deletions.
diff --git a/.github/workflows/flake.yml b/.github/workflows/flake.yml
@@ -15,3 +15,5 @@ jobs:
           python-version: "3.8"
       - name: flake8 Lint
         uses: py-actions/flake8@v1
+        with:
+          ignore: "W391,E501,W291,F821"
diff --git a/requirements.txt b/requirements.txt
@@ -2,3 +2,5 @@ jupyter==1.0.0
 matplotlib==3.3.3
 pandas==1.2.1
 sklearn
+flake8
+pycodestyle_magic
diff --git a/tasks/task_1/Classification_example_with_Iris_dataset.ipynb b/tasks/task_1/Classification_example_with_Iris_dataset.ipynb
@@ -19,6 +19,17 @@
     "from sklearn.metrics import confusion_matrix"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "choice-location",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext pycodestyle_magic\n",
+    "%flake8_on"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "accepting-threshold",
@@ -37,12 +48,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "increasing-johnston",
    "metadata": {},
    "outputs": [],
    "source": [
-    "iris = datasets.load_iris() # This dataset boult in `sklearn` library so you can load it directly\n",
+    "# This dataset boult in `sklearn` library so you can load it directly\n",
+    "iris = datasets.load_iris()\n",
     "iris_features = iris['feature_names']"
    ]
   },
@@ -56,7 +68,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "conditional-jungle",
    "metadata": {},
    "outputs": [
@@ -88,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "indirect-federal",
    "metadata": {},
    "outputs": [
@@ -130,7 +142,7 @@
     }
    ],
    "source": [
-    "for j in [1,2,3]:\n",
+    "for j in [1, 2, 3]:\n",
     "    for i, class_name in enumerate(iris.target_names):\n",
     "        sepal_length = iris.data[:, 0][iris.target == i]\n",
     "        sepal_width = iris.data[:, j][iris.target == i]\n",
@@ -157,7 +169,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "accurate-central",
    "metadata": {},
    "outputs": [],
@@ -187,19 +199,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "intellectual-proxy",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Select first 2 features\n",
-    "X = iris.data[:, [0,1]]\n",
+    "X = iris.data[:, [0, 1]]\n",
     "y = iris.target"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "meaning-conversion",
    "metadata": {},
    "outputs": [
@@ -234,7 +246,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "retained-crossing",
    "metadata": {},
    "outputs": [
@@ -278,12 +290,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "noble-compression",
    "metadata": {},
    "outputs": [],
    "source": [
-    "X = iris.data # Use all iris features as predictors \n",
+    "X = iris.data  # Use all iris features as predictors\n",
     "y = iris.target"
    ]
   },
@@ -299,17 +311,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "intelligent-quest",
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=117)"
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.25, random_state=117\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "id": "weird-hamburg",
    "metadata": {},
    "outputs": [],
@@ -335,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "id": "wanted-devil",
    "metadata": {},
    "outputs": [],
@@ -354,7 +368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "id": "several-ballet",
    "metadata": {},
    "outputs": [
@@ -426,7 +440,7 @@
        "virginica                        14  "
       ]
      },
-     "execution_count": 13,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }

diff --git a/tasks/task_1/Classification_example_with_Iris_dataset.py b/tasks/task_1/Classification_example_with_Iris_dataset.py
@@ -16,20 +16,28 @@
 from sklearn.metrics import confusion_matrix
 
 
+# In[2]:
+
+
+get_ipython().run_line_magic('load_ext', 'pycodestyle_magic')
+get_ipython().run_line_magic('flake8_on', '')
+
+
 # # Classification example with Iris dataset
 
 # This example dataset task is in classifying flower based on its features
 
-# In[2]:
+# In[3]:
 
 
-iris = datasets.load_iris() # This dataset boult in `sklearn` library so you can load it directly
+# This dataset boult in `sklearn` library so you can load it directly
+iris = datasets.load_iris()
 iris_features = iris['feature_names']
 
 
 # Print all flowers and features
 
-# In[3]:
+# In[4]:
 
 
 print(f"Dataset features:\n{iris['feature_names']}")
@@ -40,10 +48,10 @@
 # 
 # As we are limited by 2D displays and cannot visualize 4d data in a single plot - let's print data 2-axis at a time
 
-# In[4]:
+# In[5]:
 
 
-for j in [1,2,3]:
+for j in [1, 2, 3]:
     for i, class_name in enumerate(iris.target_names):
         sepal_length = iris.data[:, 0][iris.target == i]
         sepal_width = iris.data[:, j][iris.target == i]
@@ -62,7 +70,7 @@
 # 
 # For this let's focus on first 2 features ('sepal length (cm)', 'sepal width (cm)') to have consistent 2D plot
 
-# In[5]:
+# In[6]:
 
 
 def plot_decision(clf, title):
@@ -88,15 +96,15 @@ def plot_decision(clf, title):
     plt.show()
 
 
-# In[6]:
+# In[7]:
 
 
 # Select first 2 features
-X = iris.data[:, [0,1]]
+X = iris.data[:, [0, 1]]
 y = iris.target
 
 
-# In[7]:
+# In[8]:
 
 
 l_regression = LogisticRegression()
@@ -108,7 +116,7 @@ def plot_decision(clf, title):
 # 
 # To divide classes properly we need to introduce non-linear models such and Neural Networks or Decision Trees
 
-# In[8]:
+# In[9]:
 
 
 tree = DecisionTreeClassifier()
@@ -124,24 +132,26 @@ def plot_decision(clf, title):
 # You train model on some part of the dataset (lets say 67%, or 75%) and that you check if you model generalizes well by prediction on data that left 
 # 
 
-# In[9]:
+# In[10]:
 
 
-X = iris.data # Use all iris features as predictors 
+X = iris.data  # Use all iris features as predictors
 y = iris.target
 
 
 # Split data randomly using [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html)
 # 
 # Set `test_size=0.25` to use 25% data for test and 75% for train
 
-# In[10]:
+# In[11]:
 
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=117)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.25, random_state=117
+)
 
 
-# In[11]:
+# In[12]:
 
 
 # function for printing results
@@ -157,7 +167,7 @@ def eval_model(clf, X_test, y_test):
 
 # Train model on `train` data 
 
-# In[12]:
+# In[13]:
 
 
 l_regression = LogisticRegression()
@@ -166,7 +176,7 @@ def eval_model(clf, X_test, y_test):
 
 # And evaluate on `test` data 
 
-# In[13]:
+# In[14]:
 
 
 eval_model(l_regression, X_test, y_test)

diff --git a/tasks/task_1/README.md b/tasks/task_1/README.md
@@ -0,0 +1,3 @@
+# Classification example with Iris dataset
+
+Basic example for visualizing data and classifiers training
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Classification example with Iris dataset

		Basic example for visualizing data and classifiers training