[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
ronikobrosly · Aug 11, 2024 · 1fbeb76 · 1fbeb76
1 parent f52f422
commit 1fbeb76
Show file tree

Hide file tree

Showing 23 changed files with 121 additions and 121 deletions.
diff --git a/notebooks/AutomatedML/Automated Machine Learning For EconML.ipynb b/notebooks/AutomatedML/Automated Machine Learning For EconML.ipynb
@@ -180,7 +180,7 @@
     "        return\n",
     "\n",
     "    def fit(self, X, y, sample_weight = None, **fit_params):\n",
-    "        self.best_ind_ = np.argmax([gcv.fit(X, y, sample_weight = sample_weight, **fit_params).best_score_ \n",
+    "        self.best_ind_ = np.argmax([gcv.fit(X, y, sample_weight = sample_weight, **fit_params).best_score_\n",
     "                                    for gcv in self._gcv_list])\n",
     "        self.best_estimator_ = self._gcv_list[self.best_ind_].best_estimator_\n",
     "        self.best_score_ = self._gcv_list[self.best_ind_].best_score_\n",
@@ -934,7 +934,7 @@
     "plt.plot(X_test, expected_te, 'b--', label='True effect')\n",
     "plt.ylabel('Treatment Effect')\n",
     "plt.xlabel('x')\n",
-    "plt.ylim(-0.5, 1.5) \n",
+    "plt.ylim(-0.5, 1.5)\n",
     "plt.legend()\n",
     "plt.show()"
    ]

diff --git a/notebooks/CATE validation.ipynb b/notebooks/CATE validation.ipynb
@@ -246,8 +246,8 @@
    "source": [
     "# Initialize DRTester and fit/predict nuisance models\n",
     "dml_tester = DRTester(\n",
-    "    model_regression=model_regression, \n",
-    "    model_propensity=model_propensity, \n",
+    "    model_regression=model_regression,\n",
+    "    model_propensity=model_propensity,\n",
     "    cate=est_dm\n",
     ").fit_nuisance(Xval, Dval, Yval, Xtrain, Dtrain, Ytrain)\n",
     "\n",
@@ -607,8 +607,8 @@
    "source": [
     "# Initialize DRTester and fit/predict nuisance models\n",
     "t_tester = DRTester(\n",
-    "    model_regression=model_regression, \n",
-    "    model_propensity=model_propensity, \n",
+    "    model_regression=model_regression,\n",
+    "    model_propensity=model_propensity,\n",
     "    cate=est_t\n",
     ").fit_nuisance(Xval, Dval, Yval, Xtrain, Dtrain, Ytrain)\n",
     "\n",

diff --git a/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb b/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb
@@ -125,11 +125,11 @@
     "coefs_Y = np.random.uniform(0, 1, size=support_size)\n",
     "def epsilon_sample(n):\n",
     "    return np.random.uniform(-1, 1, size=n)\n",
-    "# Treatment support \n",
+    "# Treatment support\n",
     "support_T = support_Y\n",
     "coefs_T = np.random.uniform(0, 1, size=support_size)\n",
     "def eta_sample(n):\n",
-    "    return np.random.uniform(-1, 1, size=n) \n",
+    "    return np.random.uniform(-1, 1, size=n)\n",
     "\n",
     "# Generate controls, covariates, treatments and outcomes\n",
     "W = np.random.normal(0, 1, size=(n, n_w))\n",
@@ -558,7 +558,7 @@
     "support_T = support_Y\n",
     "coefs_T = np.random.uniform(0, 1, size=support_size)\n",
     "def eta_sample(n):\n",
-    "    return np.random.uniform(-1, 1, size=n) \n",
+    "    return np.random.uniform(-1, 1, size=n)\n",
     "\n",
     "# Generate controls, covariates, treatments and outcomes\n",
     "W = np.random.normal(0, 1, size=(n, n_w))\n",
@@ -595,7 +595,7 @@
     "    max_depth=30, subsample_ratio=subsample_ratio,\n",
     "    propensity_model = LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'),\n",
     "    model_Y = Lasso(alpha=lambda_reg),\n",
-    "    propensity_model_final=LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'), \n",
+    "    propensity_model_final=LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'),\n",
     "    model_Y_final=WeightedLasso(alpha=lambda_reg)\n",
     ")"
    ]
@@ -899,11 +899,11 @@
     "    coefs_Y = np.random.uniform(0, 1, size=support_size)\n",
     "    def epsilon_sample(n):\n",
     "        return np.random.uniform(-1, 1, size=n)\n",
-    "    # Treatment support \n",
+    "    # Treatment support\n",
     "    support_T = support_Y\n",
     "    coefs_T = np.random.uniform(0, 1, size=(support_size, n_treatments))\n",
     "    def eta_sample(n):\n",
-    "        return np.random.uniform(-1, 1, size=n) \n",
+    "        return np.random.uniform(-1, 1, size=n)\n",
     "    # Generate controls, covariates, treatments and outcomes\n",
     "    W = np.random.normal(0, 1, size=(n, n_w))\n",
     "    X = np.random.uniform(0, 1, size=(n, n_x))\n",
@@ -1514,7 +1514,7 @@
     "\n",
     "if not os.path.isfile(file_name):\n",
     "    print(\"Downloading file (this might take a few seconds)...\")\n",
-    "    urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\", \n",
+    "    urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\",\n",
     "                               file_name)\n",
     "oj_data = pd.read_csv(file_name)\n",
     "oj_data.head()"
@@ -1564,11 +1564,11 @@
    "outputs": [],
    "source": [
     "est = DMLOrthoForest(\n",
-    "        n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth, \n",
+    "        n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth,\n",
     "        subsample_ratio=subsample_ratio,\n",
     "        model_T=Lasso(alpha=0.1),\n",
     "        model_Y=Lasso(alpha=0.1),\n",
-    "        model_T_final=WeightedLassoCVWrapper(cv=3), \n",
+    "        model_T_final=WeightedLassoCVWrapper(cv=3),\n",
     "        model_Y_final=WeightedLassoCVWrapper(cv=3)\n",
     "       )"
    ]
@@ -1615,7 +1615,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "min_income = 10.0 \n",
+    "min_income = 10.0\n",
     "max_income = 11.1\n",
     "delta = (max_income - min_income) / 100\n",
     "X_test = np.arange(min_income, max_income + delta - 0.001, delta).reshape(-1, 1)"

diff --git a/notebooks/Causal Model Selection with the RScorer.ipynb b/notebooks/Causal Model Selection with the RScorer.ipynb
@@ -47,7 +47,7 @@
    "source": [
     "## Ignore warnings\n",
     "import warnings\n",
-    "warnings.filterwarnings('ignore') "
+    "warnings.filterwarnings('ignore')"
    ]
   },
   {
@@ -123,7 +123,7 @@
     "support_T = support_Y\n",
     "coefs_T = np.random.uniform(0, 1, size=support_size)\n",
     "def eta_sample(n):\n",
-    "    return np.random.uniform(-1, 1, size=n) \n",
+    "    return np.random.uniform(-1, 1, size=n)\n",
     "\n",
     "# Generate controls, covariates, treatments and outcomes\n",
     "X = np.random.uniform(0, 1, size=(n, n_x))\n",
@@ -446,7 +446,7 @@
    "outputs": [],
    "source": [
     "X_train, X_val, T_train, T_val,\\\n",
-    "Y_train, Y_val, expected_te_train, expected_te_val = train_test_split(X, T, Y, expected_te, \n",
+    "Y_train, Y_val, expected_te_train, expected_te_val = train_test_split(X, T, Y, expected_te,\n",
     "                                                                      test_size=.3, random_state=123)"
    ]
   },
@@ -632,7 +632,7 @@
    "source": [
     "# Visualization of bias distribution\n",
     "plt.figure(figsize=(15, 5))\n",
-    "plt.violinplot([np.abs(mdl.effect(X).flatten() - expected_te) for _, mdl in models] + \n",
+    "plt.violinplot([np.abs(mdl.effect(X).flatten() - expected_te) for _, mdl in models] +\n",
     "               [np.abs(best.effect(X).flatten() - expected_te)] +\n",
     "               [np.abs(ensemble.effect(X).flatten() - expected_te)], showmeans=True)\n",
     "plt.ylabel(\"Bias distribution\")\n",

diff --git a/notebooks/Choosing First Stage Models.ipynb b/notebooks/Choosing First Stage Models.ipynb
@@ -158,8 +158,8 @@
    ],
    "source": [
     "def first_stage():\n",
-    "    return GridSearchCV(estimator=GradientBoostingRegressor(), param_grid={\"max_depth\": [3, 5, None], \n",
-    "                                                                           \"n_estimators\": (50, 100, 200)}, \n",
+    "    return GridSearchCV(estimator=GradientBoostingRegressor(), param_grid={\"max_depth\": [3, 5, None],\n",
+    "                                                                           \"n_estimators\": (50, 100, 200)},\n",
     "                        cv=2, n_jobs=-1)\n",
     "est = LinearDML(\n",
     "    model_y=first_stage(),\n",
@@ -379,10 +379,10 @@
    ],
    "source": [
     "def first_stage():\n",
-    "    return GridSearchCVList([Lasso(max_iter=10000), GradientBoostingRegressor()], \n",
-    "                            param_grid_list=[{\"alpha\": [0.001, 0.01, 0.1, 1, 10]}, \n",
-    "                                             {\"max_depth\": [3, 5, None], \n",
-    "                                              \"n_estimators\": [50, 100, 200]}], \n",
+    "    return GridSearchCVList([Lasso(max_iter=10000), GradientBoostingRegressor()],\n",
+    "                            param_grid_list=[{\"alpha\": [0.001, 0.01, 0.1, 1, 10]},\n",
+    "                                             {\"max_depth\": [3, 5, None],\n",
+    "                                              \"n_estimators\": [50, 100, 200]}],\n",
     "                            cv=2)"
    ]
   },

diff --git a/...rios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb b/...rios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb
@@ -293,7 +293,7 @@
     "Y = train_data[\"log_demand\"].values\n",
     "T = train_data[\"log_price\"].values\n",
     "X = train_data[[\"income\"]].values  # features\n",
-    "confounder_names = [\"account_age\", \"age\", \"avg_hours\", \"days_visited\", \"friends_count\", \"has_membership\", \n",
+    "confounder_names = [\"account_age\", \"age\", \"avg_hours\", \"days_visited\", \"friends_count\", \"has_membership\",\n",
     "                    \"is_US\", \"songs_purchased\"]\n",
     "W = train_data[confounder_names].values"
    ]
@@ -352,7 +352,7 @@
    ],
    "source": [
     "# fit through dowhy\n",
-    "est_dw = est.dowhy.fit(Y, T, X=X, W=W, \n",
+    "est_dw = est.dowhy.fit(Y, T, X=X, W=W,\n",
     "                       outcome_names=[\"log_demand\"], treatment_names=[\"log_price\"], feature_names=[\"income\"],\n",
     "                       confounder_names=confounder_names, inference=\"statsmodels\")"
    ]
@@ -382,7 +382,7 @@
     "    )\n",
     "except Exception:\n",
     "    # Fall back on default graph view\n",
-    "    est_dw.view_model(layout=None) "
+    "    est_dw.view_model(layout=None)"
    ]
   },
   {
@@ -707,7 +707,7 @@
    "source": [
     "# Get treatment effect and its confidence interval\n",
     "te_pred = est_nonparam_dw.effect(X_test).flatten()\n",
-    "te_pred_interval = est_nonparam_dw.effect_interval(X_test)                       "
+    "te_pred_interval = est_nonparam_dw.effect_interval(X_test)"
    ]
   },
   {
@@ -902,7 +902,7 @@
    ],
    "source": [
     "res_placebo = est_nonparam_dw.refute_estimate(\n",
-    "    method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n",
+    "    method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
     "    num_simulations=3\n",
     ")\n",
     "print(res_placebo)"
@@ -951,7 +951,7 @@
    ],
    "source": [
     "res_subset = est_nonparam_dw.refute_estimate(\n",
-    "    method_name=\"data_subset_refuter\", subset_fraction=0.8, \n",
+    "    method_name=\"data_subset_refuter\", subset_fraction=0.8,\n",
     "    num_simulations=3)\n",
     "print(res_subset)"
    ]
@@ -1079,7 +1079,7 @@
     "policy_dic[\"Give No One Discount\"] = np.mean(revenue_fn(train_data, 0, 0.1, 1, np.ones(len(X))))\n",
     "\n",
     "## follow our policy, but give -10% discount for the group doesn't recommend to give discount\n",
-    "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data, \n",
+    "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data,\n",
     "                                                                                             -0.1, 0.1, 1, policy))\n",
     "\n",
     "## give everyone -10% discount\n",

diff --git a/...oks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb b/...oks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb
@@ -729,7 +729,7 @@
     "policy_dic[\"Give No One Discount\"] = np.mean(revenue_fn(train_data, 0, 0.1, 1, np.ones(len(X))))\n",
     "\n",
     "## follow our policy, but give -10% discount for the group doesn't recommend to give discount\n",
-    "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data, \n",
+    "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data,\n",
     "                                                                                             -0.1, 0.1, 1, policy))\n",
     "\n",
     "## give everyone -10% discount\n",

diff --git a/...os/Case Study - Multi-investment Attribution at A Software Company - EconML + DoWhy.ipynb b/...os/Case Study - Multi-investment Attribution at A Software Company - EconML + DoWhy.ipynb
@@ -78,7 +78,7 @@
     "# EconML imports\n",
     "from econml.dr import LinearDRLearner\n",
     "\n",
-    "# DoWhy imports \n",
+    "# DoWhy imports\n",
     "\n",
     "import matplotlib.pyplot as plt\n",
     "import seaborn as sns\n",
@@ -284,7 +284,7 @@
     "W = multi_data.drop(\n",
     "    columns=[\"Tech Support\", \"Discount\", \"Revenue\", \"Size\"]\n",
     ")  # controls\n",
-    "confounder_names = [\"Global Flag\", \"Major Flag\", \"SMC Flag\", \"Commercial Flag\", \n",
+    "confounder_names = [\"Global Flag\", \"Major Flag\", \"SMC Flag\", \"Commercial Flag\",\n",
     "                    \"IT Spend\", \"Employee Count\", \"PC Count\"]"
    ]
   },
@@ -487,7 +487,7 @@
    "source": [
     "# fit through dowhy\n",
     "test_customers = X.iloc[:1000].values\n",
-    "est_dw = est.dowhy.fit(Y, T, X=X, W=W, \n",
+    "est_dw = est.dowhy.fit(Y, T, X=X, W=W,\n",
     "                       outcome_names=[\"Revenue\"], treatment_names=[\"discrete_T\"], feature_names=[\"Size\"],\n",
     "                       confounder_names=confounder_names, target_units=test_customers)"
    ]
@@ -537,7 +537,7 @@
     "    )\n",
     "except Exception:\n",
     "    # Fall back on default graph view\n",
-    "    est_dw.view_model(layout=None) "
+    "    est_dw.view_model(layout=None)"
    ]
   },
   {
@@ -1327,7 +1327,7 @@
    ],
    "source": [
     "res_placebo = est_dw.refute_estimate(\n",
-    "    method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n",
+    "    method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
     "    num_simulations=5\n",
     ")\n",
     "print(res_placebo)"
@@ -1426,7 +1426,7 @@
    "source": [
     "# Removing a random subset of the data\n",
     "res_subset = est_dw.refute_estimate(\n",
-    "    method_name=\"data_subset_refuter\", subset_fraction=0.8, \n",
+    "    method_name=\"data_subset_refuter\", subset_fraction=0.8,\n",
     "    num_simulations=3)\n",
     "print(res_subset)"
    ]

diff --git a/...Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb b/...Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb
@@ -86,7 +86,7 @@
     "import lightgbm as lgb\n",
     "from sklearn.preprocessing import PolynomialFeatures\n",
     "\n",
-    "# DoWhy imports \n",
+    "# DoWhy imports\n",
     "\n",
     "# EconML imports\n",
     "from econml.iv.dr import LinearIntentToTreatDRIV\n",
@@ -145,7 +145,7 @@
    "outputs": [],
    "source": [
     "# Import the sample AB data\n",
-    "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"   \n",
+    "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"\n",
     "ab_data = pd.read_csv(file_url)"
    ]
   },
@@ -363,7 +363,7 @@
    },
    "outputs": [],
    "source": [
-    "# Define underlying treatment effect function \n",
+    "# Define underlying treatment effect function\n",
     "def TE_fn(X):\n",
     "    return (0.2 + 0.3 * X['days_visited_free_pre'] - 0.2 * X['days_visited_hs_pre'] + X['os_type_osx']).values\n",
     "true_TE = TE_fn(X_data)\n",
@@ -461,7 +461,7 @@
    "source": [
     "# Visualize causal graph\n",
     "plt.figure(figsize=(10,8))\n",
-    "est_dw.view_model(layout=None) "
+    "est_dw.view_model(layout=None)"
    ]
   },
   {
@@ -1094,7 +1094,7 @@
    ],
    "source": [
     "res_unobserved = est_dw.refute_estimate(method_name=\"add_unobserved_common_cause\",\n",
-    "                                       confounders_effect_on_treatment=\"binary_flip\", \n",
+    "                                       confounders_effect_on_treatment=\"binary_flip\",\n",
     "                                       confounders_effect_on_outcome=\"linear\",\n",
     "                                       effect_strength_on_treatment=0.05, effect_strength_on_outcome=0.5)\n",
     "print(res_unobserved)"
@@ -1127,7 +1127,7 @@
     }
    ],
    "source": [
-    "res_placebo = est_dw.refute_estimate(method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n",
+    "res_placebo = est_dw.refute_estimate(method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
     "                                     num_simulations=2)\n",
     "print(res_placebo)"
    ]
@@ -1167,7 +1167,7 @@
    ],
    "source": [
     "# Removing a random subset of the data\n",
-    "res_subset = est_dw.refute_estimate(method_name=\"data_subset_refuter\", subset_fraction=0.8, \n",
+    "res_subset = est_dw.refute_estimate(method_name=\"data_subset_refuter\", subset_fraction=0.8,\n",
     "                                    num_simulations=2)\n",
     "print(res_subset)"
    ]

diff --git a/...ustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb b/...ustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb
@@ -133,7 +133,7 @@
    "outputs": [],
    "source": [
     "# Import the sample AB data\n",
-    "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"   \n",
+    "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"\n",
     "ab_data = pd.read_csv(file_url)"
    ]
   },
@@ -351,7 +351,7 @@
    },
    "outputs": [],
    "source": [
-    "# Define underlying treatment effect function \n",
+    "# Define underlying treatment effect function\n",
     "def TE_fn(X):\r\n",
     "    return (0.2 + 0.3 * X[\"days_visited_free_pre\"] - 0.2 * X[\"days_visited_hs_pre\"] + X[\"os_type_osx\"]).values\n",
     "true_TE = TE_fn(X_data)\n",