838 lines
37 KiB
Plaintext
Executable File
838 lines
37 KiB
Plaintext
Executable File
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "7a9115cb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import seaborn as sns; sns.set()\n",
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "b58c3a40",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"heart_disease=pd.read_csv('./data/heart-disease.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "0bc694fa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"x = heart_disease.drop('target',axis=1)\n",
|
|
"y = heart_disease['target']\n",
|
|
"\n",
|
|
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "91e073a1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.7868852459016393"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"x = heart_disease.drop('target',axis=1)\n",
|
|
"y = heart_disease['target']\n",
|
|
"\n",
|
|
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)\n",
|
|
"\n",
|
|
"clf = RandomForestClassifier()\n",
|
|
"clf.fit(x_train, y_train)\n",
|
|
"\n",
|
|
"clf.score(x_test, y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "33eb1968",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,\n",
|
|
" 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,\n",
|
|
" 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1], dtype=int64)"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"clf.predict(x_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "5dc76fe7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.7868852459016393"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"y_preds = clf.predict(x_test)\n",
|
|
"np.mean(y_preds == y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "3e29898c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([[0.16, 0.84],\n",
|
|
" [0. , 1. ],\n",
|
|
" [0.14, 0.86],\n",
|
|
" [0.69, 0.31],\n",
|
|
" [0.21, 0.79]])"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"clf.predict_proba(x_test[:5])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "8f29756f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([1, 1, 1, 0, 1], dtype=int64)"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"clf.predict(x_test[:5])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "58ecfe8a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestRegressor\n",
|
|
"from sklearn.datasets import fetch_california_housing\n",
|
|
"np.random.seed(42)\n",
|
|
"housing = fetch_california_housing()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "969a0902",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'data': array([[ 8.3252 , 41. , 6.98412698, ..., 2.55555556,\n",
|
|
" 37.88 , -122.23 ],\n",
|
|
" [ 8.3014 , 21. , 6.23813708, ..., 2.10984183,\n",
|
|
" 37.86 , -122.22 ],\n",
|
|
" [ 7.2574 , 52. , 8.28813559, ..., 2.80225989,\n",
|
|
" 37.85 , -122.24 ],\n",
|
|
" ...,\n",
|
|
" [ 1.7 , 17. , 5.20554273, ..., 2.3256351 ,\n",
|
|
" 39.43 , -121.22 ],\n",
|
|
" [ 1.8672 , 18. , 5.32951289, ..., 2.12320917,\n",
|
|
" 39.43 , -121.32 ],\n",
|
|
" [ 2.3886 , 16. , 5.25471698, ..., 2.61698113,\n",
|
|
" 39.37 , -121.24 ]]),\n",
|
|
" 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),\n",
|
|
" 'frame': None,\n",
|
|
" 'target_names': ['MedHouseVal'],\n",
|
|
" 'feature_names': ['MedInc',\n",
|
|
" 'HouseAge',\n",
|
|
" 'AveRooms',\n",
|
|
" 'AveBedrms',\n",
|
|
" 'Population',\n",
|
|
" 'AveOccup',\n",
|
|
" 'Latitude',\n",
|
|
" 'Longitude'],\n",
|
|
" 'DESCR': '.. _california_housing_dataset:\\n\\nCalifornia Housing dataset\\n--------------------------\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 20640\\n\\n :Number of Attributes: 8 numeric, predictive attributes and the target\\n\\n :Attribute Information:\\n - MedInc median income in block group\\n - HouseAge median house age in block group\\n - AveRooms average number of rooms per household\\n - AveBedrms average number of bedrooms per household\\n - Population block group population\\n - AveOccup average number of household members\\n - Latitude block group latitude\\n - Longitude block group longitude\\n\\n :Missing Attribute Values: None\\n\\nThis dataset was obtained from the StatLib repository.\\nhttps://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html\\n\\nThe target variable is the median house value for California districts,\\nexpressed in hundreds of thousands of dollars ($100,000).\\n\\nThis dataset was derived from the 1990 U.S. census, using one row per census\\nblock group. A block group is the smallest geographical unit for which the U.S.\\nCensus Bureau publishes sample data (a block group typically has a population\\nof 600 to 3,000 people).\\n\\nAn household is a group of people residing within a home. Since the average\\nnumber of rooms and bedrooms in this dataset are provided per household, these\\ncolumns may take surpinsingly large values for block groups with few households\\nand many empty houses, such as vacation resorts.\\n\\nIt can be downloaded/loaded using the\\n:func:`sklearn.datasets.fetch_california_housing` function.\\n\\n.. topic:: References\\n\\n - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,\\n Statistics and Probability Letters, 33 (1997) 291-297\\n'}"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"housing"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"id": "ba187d99",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"housing_df = pd.DataFrame(housing['data'],\n",
|
|
" columns=housing['feature_names'])\n",
|
|
"housing_df['target']=housing['target']\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"id": "f47503b8",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>MedInc</th>\n",
|
|
" <th>HouseAge</th>\n",
|
|
" <th>AveRooms</th>\n",
|
|
" <th>AveBedrms</th>\n",
|
|
" <th>Population</th>\n",
|
|
" <th>AveOccup</th>\n",
|
|
" <th>Latitude</th>\n",
|
|
" <th>Longitude</th>\n",
|
|
" <th>target</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>8.3252</td>\n",
|
|
" <td>41.0</td>\n",
|
|
" <td>6.984127</td>\n",
|
|
" <td>1.023810</td>\n",
|
|
" <td>322.0</td>\n",
|
|
" <td>2.555556</td>\n",
|
|
" <td>37.88</td>\n",
|
|
" <td>-122.23</td>\n",
|
|
" <td>4.526</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>8.3014</td>\n",
|
|
" <td>21.0</td>\n",
|
|
" <td>6.238137</td>\n",
|
|
" <td>0.971880</td>\n",
|
|
" <td>2401.0</td>\n",
|
|
" <td>2.109842</td>\n",
|
|
" <td>37.86</td>\n",
|
|
" <td>-122.22</td>\n",
|
|
" <td>3.585</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>7.2574</td>\n",
|
|
" <td>52.0</td>\n",
|
|
" <td>8.288136</td>\n",
|
|
" <td>1.073446</td>\n",
|
|
" <td>496.0</td>\n",
|
|
" <td>2.802260</td>\n",
|
|
" <td>37.85</td>\n",
|
|
" <td>-122.24</td>\n",
|
|
" <td>3.521</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5.6431</td>\n",
|
|
" <td>52.0</td>\n",
|
|
" <td>5.817352</td>\n",
|
|
" <td>1.073059</td>\n",
|
|
" <td>558.0</td>\n",
|
|
" <td>2.547945</td>\n",
|
|
" <td>37.85</td>\n",
|
|
" <td>-122.25</td>\n",
|
|
" <td>3.413</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>3.8462</td>\n",
|
|
" <td>52.0</td>\n",
|
|
" <td>6.281853</td>\n",
|
|
" <td>1.081081</td>\n",
|
|
" <td>565.0</td>\n",
|
|
" <td>2.181467</td>\n",
|
|
" <td>37.85</td>\n",
|
|
" <td>-122.25</td>\n",
|
|
" <td>3.422</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
|
|
"0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n",
|
|
"1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n",
|
|
"2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n",
|
|
"3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n",
|
|
"4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n",
|
|
"\n",
|
|
" Longitude target \n",
|
|
"0 -122.23 4.526 \n",
|
|
"1 -122.22 3.585 \n",
|
|
"2 -122.24 3.521 \n",
|
|
"3 -122.25 3.413 \n",
|
|
"4 -122.25 3.422 "
|
|
]
|
|
},
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"housing_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"id": "4346e2e7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"X = housing_df.drop('target', axis=1)\n",
|
|
"y = housing['target']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"id": "cd0d7834",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = RandomForestRegressor(n_estimators=1000)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"id": "0e65ca48",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"RandomForestRegressor(n_estimators=1000)"
|
|
]
|
|
},
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"id": "394a8a63",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.8077999156142389"
|
|
]
|
|
},
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.score(X_test,y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"id": "2c4bb4b8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"y_preds = model.predict(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"id": "ff01087a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0.659563 , 2.69825401, 4.99800197, ..., 0.84376895, 4.72698433,\n",
|
|
" 2.33940125])"
|
|
]
|
|
},
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"y_preds"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"id": "7516e56f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0.696 , 3.356 , 5.00001, 3.606 , 2.766 , 0.835 , 1.22 ,\n",
|
|
" 1.693 , 3.153 , 0.789 ])"
|
|
]
|
|
},
|
|
"execution_count": 44,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"np.array(y_test[:10])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 57,
|
|
"id": "aab2f50d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"RandomForestRegressor(n_estimators=1000)"
|
|
]
|
|
},
|
|
"execution_count": 57,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"\n",
|
|
"X = heart_disease.drop('target', axis=1)\n",
|
|
"y = heart_disease['target']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
|
"\n",
|
|
"model = RandomForestClassifier(n_estimators=1000)\n",
|
|
"model.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 59,
|
|
"id": "8617403d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.4126102400932402"
|
|
]
|
|
},
|
|
"execution_count": 59,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.score(X_test, y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 88,
|
|
"id": "37f1f469",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"RandomForestRegressor(n_estimators=50)"
|
|
]
|
|
},
|
|
"execution_count": 88,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestRegressor\n",
|
|
"\n",
|
|
"X = heart_disease.drop('target', axis=1)\n",
|
|
"y = heart_disease['target']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
|
"\n",
|
|
"model_reg = RandomForestRegressor(n_estimators=50)\n",
|
|
"model_reg.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 89,
|
|
"id": "00bd7beb",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.1977237472766885"
|
|
]
|
|
},
|
|
"execution_count": 89,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model_reg.score(X_test,y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 90,
|
|
"id": "43d63da0",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.2423773420479305"
|
|
]
|
|
},
|
|
"execution_count": 90,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"maximum = 0\n",
|
|
"for i in range(10):\n",
|
|
" model_reg.fit(X_train, y_train)\n",
|
|
" if maximum < model_reg.score(X_test,y_test):\n",
|
|
" maximum = model_reg.score(X_test,y_test)\n",
|
|
"maximum"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 96,
|
|
"id": "a43b1703",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0.83606557, 0.90163934, 0.78688525, 0.8 , 0.76666667])"
|
|
]
|
|
},
|
|
"execution_count": 96,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.model_selection import cross_val_score\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"\n",
|
|
"X = heart_disease.drop('target', axis=1)\n",
|
|
"y = heart_disease['target']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
|
"\n",
|
|
"model = RandomForestClassifier(n_estimators=1000)\n",
|
|
"model.fit(X_train, y_train)\n",
|
|
"cross_val_score(model, X, y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 94,
|
|
"id": "3801871c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.819672131147541"
|
|
]
|
|
},
|
|
"execution_count": 94,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.score(X_test, y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 97,
|
|
"id": "9e096485",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.8316129032258065"
|
|
]
|
|
},
|
|
"execution_count": 97,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"np.mean(cross_val_score(clf, X, y, cv=10))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"id": "4a3038da",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0.81967213, 0.8852459 , 0.80327869, 0.8 , 0.8 ])"
|
|
]
|
|
},
|
|
"execution_count": 98,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"cross_val_score(clf, X, y, scoring=None )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 102,
|
|
"id": "e1098ca4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import roc_curve\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
|
"\n",
|
|
"y_probs = clf.predict_proba(X_test)\n",
|
|
"y_probs_positive = y_probs[:,1]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 104,
|
|
"id": "ba39424f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fpr, tpr, thresholds =roc_curve(y_test, y_probs_positive)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 107,
|
|
"id": "1a5d135c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0. , 0. , 0. , 0. , 0. ,\n",
|
|
" 0.03571429, 0.03571429, 0.03571429, 0.03571429, 0.03571429,\n",
|
|
" 0.03571429, 0.03571429, 0.07142857, 0.07142857, 0.32142857,\n",
|
|
" 0.39285714, 0.71428571, 1. ])"
|
|
]
|
|
},
|
|
"execution_count": 107,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"fpr"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 108,
|
|
"id": "88352c7e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def plot_roc_curve(fpr, tpr):\n",
|
|
" plt.plot(fpr, tpr, c='orange', label='ROC')\n",
|
|
" plt.plot([0,1],[0,1],c='darkblue', label='Guessing')\n",
|
|
" \n",
|
|
" plt.xlabel('FPR')\n",
|
|
" plt.ylabel('TPR')\n",
|
|
" plt.title('ROC curve')\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 109,
|
|
"id": "eaf11c67",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEXCAYAAACtTzM+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAvi0lEQVR4nO3deXxM9/7H8VeCbIIQk6RVtVTFFntrKXJpiSIU1fXSxdXq8nMNgiax1ZZaGpfeLtEqLmqpvb1FS8USai+1FKW1lGQiloTsc35/1M29qSVBJpPMvJ+PRx+PnJwz5v2VmnfOzJzPuBiGYSAiIk7P1d4BRESkaFAhiIgIoEIQEZHrVAgiIgKoEERE5DoVgoiIAFDS3gFEbC0wMJCaNWvi6uqKi4sLqampeHt7M3r0aIKCggC4du0aM2bMYMOGDbi5uQHQrl073njjDTw8PHL+rOXLl7Nw4ULS0tLIzMykSZMmhIWFUbZsWbusTaQgueg6BHF0gYGBbNu2jQoVKuR877PPPmPdunUsWrSIrKwsXnzxRRo2bMjAgQPx9PQkNTWVqVOncvjwYebMmUPJkiX5+OOP2bRpE9OnT6dixYpkZmYyYcIEfv75ZxYsWGDHFYoUDJ0hiNPJysri3LlzlCtXDoA1a9ZgtVp55513co7x9PQkIiKCp556im+//Zbg4GA++eQTli9fTsWKFQEoVaoUQ4cO5dtvvyUjIyPnzOI/vv/+e6ZNm4bVasXLy4sxY8bg7e1NaGgoe/fuBeDMmTM528uWLePLL7/MOYPJzMzklVdeISQkBIDJkycDEBYWxpIlS/jiiy+wWq34+PgwYsQIHnroIZv/3YljUyGIU3jppZcAuHjxIu7u7rRt25aJEycCsHfvXpo2bXrDbVxcXGjRogW7d++mcuXKeHh4ULVq1VzHeHp60rVr1xtum5iYSFhYGHPnzqVOnTqsW7eOKVOmMHr06NvmPH78OBs2bMDb25ulS5eybNkyQkJCyM7OZtWqVfzrX/9ix44drFixgvnz5+Pp6cmWLVt4++23+eabb+7uL0fkOhWCOIU5c+ZQoUIFDh48yGuvvUazZs3w9fXN2Z+VlXXT22VkZFCiRAlcXV2xWq35vr89e/bw8MMPU6dOHQA6dOhAhw4dOHPmzG1vFxgYiLe3NwCdOnVi0qRJWCwWDh06RNWqValatSqLFy/mt99+47nnnsu53ZUrV7h06RI+Pj75zijyZ3qXkTiVunXr8s477zB8+PCcB+fGjRuza9euGx7wrVYrO3fupFGjRtSoUYOsrCx+/fXXXMekp6fTr18/4uPjc32/RIkSuLi45GwbhsGRI0dwcXHhf1+2y8zMzHU7Ly+vnK89PT0JCQnhq6++YunSpfTq1SsnV7du3Vi5ciUrV65k+fLlLF26NOcpMJG7pUIQp9OlSxfq16+f85RRSEgInp6eTJgwgbS0NADS0tIYO3YspUuXpn379ri5udGvXz8iIiJITEwE/jh7mDBhAqmpqfj7++e6jwYNGvDLL79w7NgxANavX5/zbqTMzEyOHz8OwNdff33brM888wzLly9nz549Oa8ltGrViq+//pqEhAQAvvjii5ynxETuhZ4yEqc0YsQIunbtyubNm2ndujWzZs3iww8/pEePHri6upKdnU27du2YNWsWpUqVAqB///54enrSt29f4I+zg0cffZQPP/zwhj+/YsWKTJkyhWHDhpGdnY23tzfR0dGUKVOGsLAw+vXrR4UKFejYseNtc9arV48SJUrQsWNH3N3dgT8KoV+/frz66qu4uLjg7e3NBx98kOuMRORu6G2nIiIC6CkjERG5ToUgIiKACkFERK5TIYiICKBCEBGR61QIIiICFPPrEC5evIrVeufvmvX19ebChRQbJCq6tGbnoDU7h7tds6urC+XLl77l/mJdCFarcVeF8J/bOhut2Tlozc7BFmvWU0YiIgKoEERE5DoVgoiIADYuhJSUFLp06XLTGfCHDx+mR48ehISEEBERcct59CIiUjhsVgg//vgjzz///A3z4/8jLCyMkSNHsnbtWgzDYPHixbaKIiIi+WCzdxktXryYUaNGMXTo0Bv2nT17lrS0NBo2bAhAjx49mD59Oi+88IKt4hQOI/+fqFXoDGvRzmcLWrNzcMY124jNCmH8+PG33JeQkIDJZMrZNplMN3ziVHFT+ufheJ26cS5+UWLK+xCHozU7B2dYs9XqwszvmzF+5eMsnNOahx9tWeD3YZfrEKxW6w0fL3g3H+7h6+t91xlMpjJ3fdub+ukEeN4PNV4v2D9XRJzesd9c6DfGjdhdJWjXzKDOIw3xKejHMOxUCAEBAVgslpztxMRE/Pz87vjPuXAh5a4uzjCZymCxJN/x7W6nXEYWLqXu41KAuUD/3IJiizUXdVqzc3DkNWdlWfnkkwO8995O3NxKEB3dghdeCMTHr+xdrdnV1eW2v0jbpRAqVaqEu7s7u3fvpkmTJqxcuZI2bdrYI4qISJF08OAFzOZY9u2z0LFjVSZNakVAwK3HThSEQr0OoV+/fhw4cACAKVOmMHHiRDp27Mi1a9fo06dPYUYRESmS0tOziYraSfv2yzhzJoVPP32COXM62LwMoBDOEDZs2JDz9cyZM3O+rlWrFl9++aWt7/7eWfN7fYTe5SAi92bXrnjM5lh+/vkivXo9zNixLalQwaPQ7r9YD7ezNc/f/on30XfyfXxmuUdtmEZEHNXVq5lERe0kJuYA99/vzRdfPMnjjz9Y6DlUCLdR4tpxDFcvrlUblK/jMyrodRARuTObNp1h0KBNnDqVzCuv1CEyshllyrjZJYsKIQ9GCS+uVb/x4joRkXtx+XI6o0dvZ/78I1SvXo6VK0Np0eJ+u2ZSIYiIFLJ///skw4ZtITExlQEDGjJ4cBM8Pe3/cGz/BCIiTiIh4Rrh4VtZteoEdev6Mm9eRxo0KDrXWasQRERszDAMliw5xogRcVy9mkl4+CO89VYDSpUqYe9ouagQRERs6MyZZMLCNrN+/WmaNvVn2rRgatYsb+9YN6VCEBGxAavVYPbsQ4wd+wOGYTBhwmO88kodSpQoup9LpkIQESlgv/xyCbM5lu3bzxMc/ABTp7bhwQcLfhhdQVMhiIgUkKwsKx9+uJ/Jk3fh4VGC6dP/wrPP1ryrac72oEIQESkABw4kYjbHsn9/Ip07VyMqqhX+/l72jnVHVAgiIvcgLS2L99/fw4wZ+6hQwYPPPmtPaGh1e8e6KyoEEZG7tGPHeczmWI4du8Szz9bk3XdbUL584Q2jK2gqBBGRO5SSksmECTv47LOfqFTJm4ULO9GuXWV7x7pnKgQRkTvw/fenGTJkE2fOpNC3bz3Cwx/B29s+w+gKmgpBRCQfLl5MY9SobSxceJQaNXxYtaobzZoF2DtWgVIhiIjkYfXqEwwfvoWkpDQGDmzEoEGN8fBwvIdPx1uRiEgBiY+/xjvvbOGrr04SFFSRhQs7ERRU0d6xbEaFICLyJ4ZhsGjRUUaO3EZqahaRkY/yxhv1i9wwuoKmQhAR+R+nTiUzZMgmNm48Q7NmAURHB1Ojho+9YxUKFYKICH8Mo5s16yDjxv2Ai4sLUVGtePnlOri6Fo+xEwVBhSAiTu/YsYuYzZvYseM87dpVZvLk1lSuXPSH0RU0FYKIOK3MzGz++c8fmTJlN6VLl+KDD9rSq9fDxWYYXUFTIYiIU9q/38LAgbH89NMFunatzoQJj+HnV7yG0RU0FYKIOJXU1CymTt3NP//5I76+nnz+eQc6d65m71hFggpBRJzG9u3nMJtj+eWXy7zwQiCjR7fAx8fd3rGKDBWCiDi8lJQMxo3bwaxZB3nwwTIsWdKZ4OAH7B2ryFEhiIhDW7/+FEOGbOb331N4/fUghg9/hNKlS9k7VpGkQhARh5SUlMaIEXEsWXKMmjV9+OqrbjzyiGMNoytoKgQRcSiGYVwfRreVS5fSGTSoMWZzY9zdHXvsREFQIYiIw4iPv8rQoVv45ptfadCgIosXd6ZePV97xyo2XG35h69evZpOnTrRoUMH5s+ff8P+gwcP0rNnT7p27crrr7/OlStXbBlHRByUYRgsWHCExx5bzPffn2bkyGZ88013lcEdslkhxMfHEx0dzYIFC1ixYgWLFi3i+PHjuY4ZP348AwYMYNWqVVSrVo3PPvvMVnFExEGdOHGJXr2+ZuDAWOrW9WXjxl68/XZDSpa06e+7Dslmf2NxcXE0b94cHx8fvLy8CAkJYc2aNbmOsVqtXL16FYDU1FQ8PIrvh1OLSOHKzrbyySf7CQqazZ49CUya1Jrly0OpXr2cvaMVWzZ7DSEhIQGTyZSz7efnx/79+3MdM3z4cF599VUmTJiAp6cnixcvvqP78PX1vut8JlM+Blf96gauLvk7thhwlHXcCa3ZMR06lEjfvmvZvv0cnTpV4+OP21O5cll7xypUtvg526wQrFZrrgFRhmHk2k5LSyMiIoLZs2dTv359Pv/8c4YNG0ZMTEy+7+PChRSsVuOOs5lMZbBYkvM8zjs1A3erwYV8HFvU5XfNjkRrdjwZGdnMmLGP6Og9eHu78eGH7ejfvxGJiSkOve4/u9ufs6ury21/kbbZU0YBAQFYLJacbYvFgp+fX8720aNHcXd3p379+gA8++yz7Nixw1ZxRKSY27fPQocOy3jvvV107lyNzZuf4emnnXcyqS3YrBBatmzJtm3bSEpKIjU1lXXr1tGmTZuc/VWqVOH8+fOcOHECgPXr1xMUFGSrOCJSTKWmZjFmzHY6dlxOUlIac+eG8MknT2Ayedo7msOx2VNG/v7+mM1m+vTpQ2ZmJk8//TT169enX79+DBgwgKCgICZOnMjAgQMxDANfX18mTJhgqzgiUgzFxf2O2RzLyZNX6N27FiNHNqdcOQ2jsxWbXpgWGhpKaGhoru/NnDkz5+vg4GCCg4NtGUFEiqHk5AzeffcH5sw5RJUqZVm6tAutW1eydyyHpyuVRaRI+fbb3wgL28z589fo378+w4c3xctLw+gKgwpBRIqECxdSiYyMY+nS49SqVZ7PPmtPkyb+9o7lVFQIImJXhmGwYsUvhIdv5cqVDIYMacLAgY1wc9MwusKmQhARuzl37irDhm1mzZrfaNTIRHR0MHXqaP6QvagQRKTQGYbBvHlHGD16O1lZVsaMacFrr9WjRAnNH7InFYKIFKqTJy8zePAmtmz5nVat7mfq1DZUq6b5Q0WBCkFECkV2tpWYmJ+IitpJyZKuTJ3ahr/+tZauNC5CVAgiYnOHDydhNseyZ08CISFVmDSpNffdV9reseRPVAgiYjMZGdn84x97mTZtL2XLuvHJJ4/z1FMP6aygiFIhiIhN7NmTgNkcy+HDSfToUYPx41vi66v5Q0WZCkFECtS1a5lERe0iJuYA/v5ezJvXkQ4dqtg7luSDCkFECsyWLWcxmzfx229XeOmlOowc2YwyZdzsHUvySYUgIvfsypV0xozZzr/+dYRq1cqyYkUoLVveb+9YcodUCCJyT9au/ZWwsM0kJKTy1lsNCAtromF0xZQKQUTuSmJiKhERW1m+/Bdq167A3LkdadjQlPcNpchSIYjIHTEMg2XLjhMREUdycgbDhjXl//6voYbROQAVgojk29mzKQwduplvvz1FkyZ+REcHU6tWBXvHkgKiQhCRPFmtBnPnHubdd7djtRqMHduCv/1Nw+gcjQpBRG7rxInLDBoUS1zcOVq3rsTUqW2oWrWsvWOJDagQROSmsrKsfPzxfiZN2oWbWwmmTQvm+ecDNXbCgakQROQGBw9ewGyOZd8+Cx07VmXSpFYEBGgYnaNTIYhIjvT0bKKj9zB9+j58fNz59NMnCA2trrMCJ6FCEBEAdu48j9kcy9Gjl+jV62HGjm1JhQoe9o4lhUiFIOLkrl7NZOLEHcyc+RP33+/NF188yeOPP2jvWGIHKgQRJxYbe4bBgzdx6lQyr75al8jIR/H21jA6Z6VCEHFCly+nM2rUNhYs+Jnq1cuxalVXmje/z96xxM5UCCJO5t//PsmwYVtITExlwICGDB7cBE9PPRSICkHEaSQkXCM8fCurVp2gbl1f5s3rSIMGGkYn/6VCEHFwhmGwZMkxRoyI4+rVTMLDH+GttxpQqpSG0UluKgQRB3bmTDJDhmxmw4bTPPKIP9HRwdSsWd7esaSIsulkqtWrV9OpUyc6dOjA/Pnzb9h/4sQJevfuTdeuXenbty+XL1+2ZRwRp2G1Gnz22U+0br2E7dvPMWHCY6xe3U1lILdls0KIj48nOjqaBQsWsGLFChYtWsTx48dz9huGwRtvvEG/fv1YtWoVtWvXJiYmxlZxRJzG8eOX6NZtFe+8s5WmTf3ZtOkZ/va3eri66mpjuT2bPWUUFxdH8+bN8fHxASAkJIQ1a9bw9ttvA3Dw4EG8vLxo06YNAP379+fKlSu2iiPi8LKyrERF/cDo0XF4eJRg+vS/8OyzNTV2QvLNZoWQkJCAyfTfdzD4+fmxf//+nO1Tp05RsWJFwsPDOXz4MNWrV2fEiBG2iiPi0A4cSMRsjmX//kQ6d65GVFQr/P297B1LihmbFYLVas31m4lhGLm2s7Ky2LFjB/PmzSMoKIhp06YRFRVFVFRUvu/D19f7rvOZTGXyPuhXN3B1yd+xxYCjrONOOPqa09KyGDt2G++9t4OKFT358suu9OxZ096xCp2j/5xvxhZrtlkhBAQEsGvXrpxti8WCn59fzrbJZKJKlSoEBQUB0KVLFwYMGHBH93HhQgpWq3HH2UymMlgsyXke552agbvV4EI+ji3q8rtmR+Loa/7hhz+G0R0/fonnnqvJmDEtqFnT5NBrvhlH/znfzN2u2dXV5ba/SNvsReWWLVuybds2kpKSSE1NZd26dTmvFwA0atSIpKQkjhw5AsCGDRuoW7eureKIOIyUlEzCw7fStetK0tOzWLSoE9Ont6V8eU0mlXtjszMEf39/zGYzffr0ITMzk6effpr69evTr18/BgwYQFBQEP/85z+JjIwkNTWVgIAAJk2aZKs4Ig7h++9PM2TIJs6cSaFv33qEhz+Kt3cpe8cSB2HTC9NCQ0MJDQ3N9b2ZM2fmfN2gQQO+/PJLW0YQcQgXL6YxatQ2Fi48So0aPqxa1Y1mzQLsHUscjK5UFiniVq8+wfDhW0hKSmPgwEYMGtQYDw/905WCp/+rRIqo+PirDB++la+/PklQUEUWLuxEUFBFe8cSB6ZCECliDMNg0aKjjBgRR1paNpGRzXjjjSANoxObUyGIFCGnTiUzePAmYmPP0KxZANHRwdSo4WPvWOIkVAgiRYDVajBr1kHGjfsBFxcXoqJa8fLLdTR/SArVXV2HsHjx4oLOIeK0jh69SGjoSsLDt9K8+X1s2tSLV1+tqzKQQnfLQti8eTOtWrUiNDSUM2fOAHDgwAF69uzJ+++/X2gBRRxVZmY206btoV27Lzl+/BIffNCWL754ksqVnW8MgxQNt3zKaNKkSYwYMYIzZ87w8ccfU7t2baKionjqqaf49NNPCzOjiMPZv9/C3/8ey8GDF+jatToTJjyGn5+G0Yl93bIQrFYrISEhAAQHB7Njxw7mzp1Lo0aNCi2ciKNJTc1iypTdfPjhj/j6evL55x3o3LmavWOJALcpBDc3t1zbn3/+OZUqVbJ5IBFHtX37OczmWH755TIvvliLUaOa4+Pjbu9YIjny9S6j8uXLqwxE7lJKSgZjx/7A558f4sEHy7BkSWeCgx+wdyyRG9yyENLS0jh06BCGYZCenp7z9X9oMqlI3tavP8WQIZv5/fcUXn89iOHDH6F0aQ2jk6LploWQnp6e83GXQK6vXVxcWL9+vW2TiRRjSUlpjBgRx5Ilx6hZ04evvurGI49oGJ0UbbcshA0bNhRmDhGHYBgGq1ad4J13tnDpUgaDBjXGbG6Mu7vGTkjRd9vXEObPn8/Jkydp3rw5TzzxRGFlEimWzp+/yrBhW/jmm19p0KAiS5b8hbp1fe0dSyTfbnlh2sSJE1m9ejXu7u68//77zJ49uxBjiRQfhmEwf/4RWrVazPffn2bkyGZ88013lYEUO7c8Q4iLi2P58uWULFmSPn368Oabb/Lyyy8XYjSRou/XX68wePAmNm8+S4sW9xEdHUz16uXsHUvkrtyyEEqWLEnJkn/s9vf3JzMzs9BCiRR12dlWPv30JyZO3ImrqwuTJrWmT5/amj8kxVq+p52WKKEXxUQAfv75IgMHbmT37gSeeOJBJk9uTaVK3vaOJXLPblkIqampua49+N/rEkDXIYjzycjIZsaMfbz//h7KlHHjww/b0bNnDVxcdFYgjuGWhZCcnJzr2gP477UIug5BnM3evQkMHBjL4cNJdO/+EOPGPYbJ5GnvWCIF6paFYDKZWLFiRSFGESl6rl3LZPLk3Xz00X78/DyZOzeEjh2r2juWiE3cshB0GizOLi7ud8zmWE6evELv3n8MoytbVsPoxHHla5bRzeg1BHFUyckZvPvuD8yZc4gqVcqydGkXWrfWcEdxfLcshNOnT/N///d/Ny0EvYYgjurbb38jLGwz589fo3//+gwf3hQvLw2jE+dwy0KoUaOGXkMQp5GYmEpkZBzLlh2nVq3yfPZZe5o08bd3LJFCle/rEEQckWEYrFjxC+HhW7lyJYOwsCb8/e+NcHPTdTfifG5ZCE2bNi3MHCKF7ty5qwwdupm1a3+jcWM/oqODqV27gr1jidjNLQshMjKyMHOIFBrDMJg37wijR28nK8vKmDEteO21epQocctZjyJOQU8ZiVM5efIygwdvYsuW32nV6n6mTm1DtWoaRicCKgRxEtnZVj755ADvvbeLkiVdmTq1DX/9ay1dbyPyP2x6jrx69Wo6depEhw4dmD9//i2P27hxI+3atbNlFHFihw8n0anTCkaP3k6bNpXYsuUZeveurTIQ+RObnSHEx8cTHR3NsmXLcHNz47nnnqNZs2bUqFEj13GJiYm89957toohTiwjI5tJk3bxj3/spWxZNz755HGeeuohFYHILdjsDCEuLo7mzZvj4+ODl5cXISEhrFmz5objIiMjbxiiJ3Kv9uxJoHHjuUyZspuuXauzZcszdO+uyaQit2OzM4SEhARMJlPOtp+fH/v37891zNy5c6lTpw4NGjS4q/vw9b37GfQmU5m8D/rVDVxd8ndsMeAo67ida9cyGTFiC9Om7eH++7356qvudO78kL1jFSpn+Dn/mdZcMGxWCFarNddvY4Zh5No+evQo69atY/bs2Zw/f/6u7uPChRSs1pvPWrodk6kMFktynsd5p2bgbjW4kI9ji7r8rrk427z5LIMGbeK3367w0kt1mD79cdLTMxx+3f/LGX7Of6Y155+rq8ttf5G22VNGAQEBWCyWnG2LxYKfn1/O9po1a7BYLPTs2ZPXXnuNhIQEXnjhBVvFEQd2+XI6gwbF0rPnV7i6wooVoUye3FqTSUXukM0KoWXLlmzbto2kpCRSU1NZt24dbdq0ydk/YMAA1q5dy8qVK4mJicHPz48FCxbYKo44qDVrfqV168UsWPAzb73VgO+/f5qWLe+3dyyRYslmTxn5+/tjNpvp06cPmZmZPP3009SvX59+/foxYMAAgoKCbHXX4gQsllQiIrayYsUv1K5dgblzO9KwoSnvG4rILdn0wrTQ0FBCQ0NzfW/mzJk3HPfAAw+wYcMGW0YRB2EYBkuXHicyMo6UlAyGD3+Et99uoGF0IgVAVypLsXH2bApDh27m229P0aSJH9Om/YXAwPL2jiXiMFQIt5J9lZKXd2GU8LJ3EqdntRrMnXuYd9/djtVqMG5cS/r2rathdCIFTIVwM9ZMyv7Yh5LJB7jSYJ690zi1EycuYzbHsm3bOVq3rsTUqW2oWrWsvWOJOCQVwp8ZVsocegv3C9+SXHs6GX5d7J3IKWVlWfnoo/1MnrwLN7cSTJsWzPPPB+pKYxEbUiH8Seljo/A4t5CrD0WS9sDL9o7jlH766QJm80Z+/DGRJ5+synvvtSIgoLS9Y4k4PBXC//D8dQZev/2D1Mr9uFYtzN5xnE56ejbR0XuYPn0fPj7ufPrpE4SGVtdZgUghUSFc5/77F3gfiyDNvwcpgZNAD0KFaufO85jNsRw9eolnnqnJu++2oEIFD3vHEnEqKgTALXEdZQ69RUaFYJLrfQIuek97YUlJySQqagczZ/7E/fd788UXT/L44w/aO5aIU3L6Qih5eSdlf+xDlnddrjSYD66af1NYNm48w5Ahmzh1KplXX61LZOSjeHu72TuWiNNy6kIokfIz5fY+jdXdn8uNlmKU1NsZC8OlS+mMHr2NBQt+pnr1cqxa1ZXmze+zdywRp+e0heCadpZye7uDSykuNV6O4e6X943knn399UmGDdvChQupDBjQkMGDm+Dp6bT/G4oUKc75LzHjIuX29MAl8zKXm/4bq1d1eydyeAkJ1wgP38qqVSeoV8+XBQs6Ur++htGJFCXOVwjZ1yC2JyWu/cLlxsvIKnt3n9Ym+WMYBosXH2PEiDiuXcskPPwR3nqrAaVK6YV7kaLG6QrB4/cvwLKV5KDZZFZok/cN5K6dPp1MWNhmNmw4zSOP+DNtWjAPP6xhdCJFldMVgkt2CgDpphA7J3FcVqvB558fZNy4HRiGwYQJj/Hqq3VxddW1HSJFmdMVgtjW8eOXMJtj+eGH8wQHP8DUqW148EHn+wB0keJIhSAFIjMz+/owut14epZk+vS/8OyzNTV2QqQYUSHIPTtwIJGBA2M5cCCRLl2qMXFiK/z99TkSIsWNCkHuWlpaFu+/v4cZM/ZRoYIHn33WntBQvYVXpLhSIchd+eGHP4bRHT9+ieeeq8mYMS0oX17D6ESKMxWC3JGUlAzGj9/BrFkHeeABbxYt6kTbtpXtHUtECoAKQfJtw4bTDBmyibNnU+jbtx7h4Y/i7V3K3rFEpICoECRPFy+mMXLkNhYtOsrDD/uwalU3mjULsHcsESlgKgS5rdWrTzB8+BaSktIwmxthNjfGw0P/24g4Iv3LlpuKj7/K8OFb+frrkwQFVWThwk4EBVW0dywRsSEVguRiGAaLFh1lxIg40tKyiYxsxptv1qdkSVd7RxMRG1MhSI5Tp5IZPHgTsbFnaNYsgOjoYGrU8LF3LBEpJCoEITvbyqxZBxk/fgcuLi5ERbXi5ZfraBidiJNRITi5o0cvYjbHsnNnPO3aVWbKlNY88ICG0Yk4IxWCk8rMzOaDD35k6tTdlC5dig8+aEuvXg9rGJ2IE7NpIaxevZqPPvqIrKwsXnrpJV588cVc+7/77jtmzJiBYRg88MADTJw4kXLlytkykgD791v4+99jOXjwAt26VWf8+Mfw89MwOhFnZ7O3jsTHxxMdHc2CBQtYsWIFixYt4vjx4zn7U1JSGD16NDExMaxatYrAwEBmzJhhqzgCpKZmMXbsD4SELMdiSWX27A7MnNleZSAigA0LIS4ujubNm+Pj44OXlxchISGsWbMmZ39mZiajRo3C398fgMDAQM6dO2erOE5v06bTtG37JTNm7OO55wLZsuUZOnWqZu9YIlKE2KwQEhISMJlMOdt+fn7Ex8fnbJcvX5727dsDkJaWRkxMDE888YSt4jit5OQMhg3bTHDwIrKyrCxZ0pno6GB8fNztHU1EihibvYZgtVpzvUBpGMZNX7BMTk7mrbfeolatWnTv3v2O7sPX1/vOg1n+eCA0VSwDJUvf+e2LkW++OcHrr3/LmTPJDBzYhHHjHqN0aTd7xypUJpPzvWNKa3YOtlizzQohICCAXbt25WxbLBb8/PxyHZOQkEDfvn1p3rw54eHhd3wfFy6kYLUad3Qbz6vpeAOWxGQoYb3j+ywOkpLSGDEijiVLjhEYWJ6vv36KJ5+sgcWSzLVr6faOV2hMpjJYLMn2jlGotGbncLdrdnV1ue0v0jZ7yqhly5Zs27aNpKQkUlNTWbduHW3atMnZn52dTf/+/XnyySeJiIjQ2x0LgGEYrFz5C61aLWL58l8YNKgx333Xk6ZN/e0dTUSKAZudIfj7+2M2m+nTpw+ZmZk8/fTT1K9fn379+jFgwADOnz/PoUOHyM7OZu3atQDUq1eP8ePH2yqSQzt//ipDh25hzZpfadCgIkuW/IW6dX3tHUtEihGbXocQGhpKaGhoru/NnDkTgKCgII4cOWLLu3cKhmGwYMHPjBq1jYyMbEaObEb//hpGJyJ3TlcqF2O//nqFwYNj2bz5d1q0uI/o6GCqV9eFfSJyd1QIxVB2tpVPP/2JiRN34urqwuTJrendu7aG0YnIPVEhFDNHjiRhNseye3cC7ds/yOTJrbn//rt4+62IyJ+oEIqJjIxspk/fR3T0HsqUceOjj9rRo0cNvTtLRAqMCqEY2Ls3gYEDYzl8OInu3R9i/PjHqFjR096xRMTBqBCKsGvXMpk0aRcff3wAPz9P5s4NoWPHqvaOJSIOSoVQRG3d+juDBsVy8uQVeveuxahRzSlbVvOHRMR2VAhFzJUr6bz77g/MnXuYKlXKsnRpF1q3rmTvWCLiBFQIRci33/7GkCGbiY+/xhtv1GfYsKZ4eZWydywRcRIqhCIgMTGVyMg4li07Tu3aFfj88w40buyX9w1FRAqQCsGODMNg+fJfiIjYypUrGYSFNeHvf2+Em1sJe0cTESekQrCT339PYejQzaxbd4rGjf2Ijg6mdu0K9o4lIk5MhVDIrFaDefMOM2bMD2RlWRkzpgWvvVaPEiU0jE5E7EuFUIhOnLjM4MGb2Lr1d1q1up+pU9tQrZqG0YlI0aBCKATZ2VY++eQA7723i5IlXXn//Ta8+GItjZ0QkSJFhWBjhw5dwGyOZe9eCyEhVZg0qTX33efYn+UsIsWTCsFG0tOzmTZtD//4xz58fNyIiXmcbt0e0lmBiBRZKgQb2L07HrM5liNHLtKzZw3GjWuJr6+G0YlI0aZCKEBXr2YSFbWTmJgD3HdfaebP70j79lXsHUtEJF9UCAVk8+azDBq0id9+u8JLL9Vh5MhmlCnjZu9YIiL5pkK4R5cvpzNmzHbmzTtCtWplWbEilJYt77d3LBGRO6ZCuAdr1vzK0KGbSUhI5e23GxAW1hRPT/2VikjxpEevu2CxpBIRsZUVK36hdu0KzJ3bkYYNTfaOJSJyT1QId8AwDL788hiRkXFcvZrJ8OGP8PbbDTSMTkQcggohn86eTSEsbDPffXeKJk38mDbtLwQGlrd3LBGRAqNCyIPVajBnziHGjv0Bq9Vg3LiW9O1bV8PoRMThqBBu48SJy5jNsWzbdo42bSoxdWobqlQpa+9YIiI2oUK4iawsKx99tJ/Jk3fh5laCadOCef75QI2dEBGHpkL4k59+usDAgRvZvz+RJ5+synvvtSIgQMPoRMTxqRCuS0/PJjp6D9On78PHx51PP32C0NDqOisQEaehQgB27jyP2RzL0aOXeOaZmrz7bgsqVPCwdywRkUJl07fKrF69mk6dOtGhQwfmz59/w/7Dhw/To0cPQkJCiIiIICsry5ZxbpCSkklk5Fa6dFnJtWtZLFz4JB980FZlICJOyWaFEB8fT3R0NAsWLGDFihUsWrSI48eP5zomLCyMkSNHsnbtWgzDYPHixbaKc4ONsef4y1+WEBPzE6+8UpdNm3rRrt2DhXb/IiJFjc0KIS4ujubNm+Pj44OXlxchISGsWbMmZ//Zs2dJS0ujYcOGAPTo0SPXflu5mgp9Y3rxzHPrKVXKlVWruhIV1Qpvb00mFRHnZrPXEBISEjCZ/jvfx8/Pj/37999yv8lkIj4+/o7uw9fX+45zrVgWwJzNbgwf1pRRo1vh4eE8L6OYTGXsHaHQac3OQWsuGDZ7NLRarbneoWMYRq7tvPbnx4ULKVitxh3dpkW3p7nYy0padimSk1NJTr6jmxdbJlMZLBYnWex1WrNz0Jrzz9XV5ba/SNvsKaOAgAAsFkvOtsViwc/P75b7ExMTc+23FdcSJShToYLN70dEpLixWSG0bNmSbdu2kZSURGpqKuvWraNNmzY5+ytVqoS7uzu7d+8GYOXKlbn2i4hI4bJZIfj7+2M2m+nTpw9PPfUUXbp0oX79+vTr148DBw4AMGXKFCZOnEjHjh25du0affr0sVUcERHJg4thGHf2JHwRcjevIYCec3QWWrNz0Jrzz26vIYiISPGiQhAREUCFICIi1xXrq7JcXe9+Eum93La40pqdg9bsHO5mzXndpli/qCwiIgVHTxmJiAigQhARketUCCIiAqgQRETkOhWCiIgAKgQREblOhSAiIoAKQURErlMhiIgI4OCFsHr1ajp16kSHDh2YP3/+DfsPHz5Mjx49CAkJISIigqysLDukLFh5rfm7776jW7dudO3alTfffJPLly/bIWXBymvN/7Fx40batWtXiMlsJ681nzhxgt69e9O1a1f69u3rFD/ngwcP0rNnT7p27crrr7/OlStX7JCyYKWkpNClSxfOnDlzwz6bPH4ZDur8+fNG27ZtjYsXLxpXr141QkNDjWPHjuU6pnPnzsbevXsNwzCMd955x5g/f74dkhacvNacnJxsPPbYY8b58+cNwzCMadOmGWPHjrVX3AKRn5+zYRiGxWIxOnbsaLRt29YOKQtWXmu2Wq1Ghw4djNjYWMMwDGPy5MnGpEmT7BW3QOTn5/z8888bGzduNAzDMCZOnGi8//779ohaYPbt22d06dLFqFu3rnH69Okb9tvi8cthzxDi4uJo3rw5Pj4+eHl5ERISwpo1a3L2nz17lrS0NBo2bAhAjx49cu0vjvJac2ZmJqNGjcLf3x+AwMBAzp07Z6+4BSKvNf9HZGQkb7/9th0SFry81nzw4EG8vLxyPpK2f//+vPjii/aKWyDy83O2Wq1cvXoVgNTUVDw8POwRtcAsXryYUaNG3fSz5m31+OWwhZCQkIDJZMrZ9vPzIz4+/pb7TSZTrv3FUV5rLl++PO3btwcgLS2NmJgYnnjiiULPWZDyWjPA3LlzqVOnDg0aNCjseDaR15pPnTpFxYoVCQ8Pp3v37owaNQovLy97RC0w+fk5Dx8+nMjISFq1akVcXBzPPfdcYccsUOPHj6dp06Y33Werxy+HLQSr1YqLy39HvRqGkWs7r/3FUX7XlJyczGuvvUatWrXo3r17YUYscHmt+ejRo6xbt44333zTHvFsIq81Z2VlsWPHDp5//nmWL19O5cqViYqKskfUApPXmtPS0oiIiGD27Nls2bKFF154gWHDhtkjaqGw1eOXwxZCQEAAFoslZ9tiseQ69frz/sTExJuemhUnea0Z/vjN4oUXXiAwMJDx48cXdsQCl9ea16xZg8VioWfPnrz22ms56y/O8lqzyWSiSpUqBAUFAdClSxf2799f6DkLUl5rPnr0KO7u7tSvXx+AZ599lh07dhR6zsJiq8cvhy2Eli1bsm3bNpKSkkhNTWXdunU5z6kCVKpUCXd3d3bv3g3AypUrc+0vjvJac3Z2Nv379+fJJ58kIiKi2J8RQd5rHjBgAGvXrmXlypXExMTg5+fHggUL7Jj43uW15kaNGpGUlMSRI0cA2LBhA3Xr1rVX3AKR15qrVKnC+fPnOXHiBADr16/PKURHZLPHr3t+WboIW7VqldG5c2ejQ4cORkxMjGEYhvG3v/3N2L9/v2EYhnH48GGjZ8+eRkhIiDFo0CAjPT3dnnELxO3WvG7dOiMwMNDo2rVrzn/h4eF2Tnzv8vo5/8fp06cd4l1GhpH3mvft22f07NnT6NSpk/Hqq68aiYmJ9oxbIPJa88aNG43Q0FCjS5cuxksvvWScOnXKnnELTNu2bXPeZWTrxy99YpqIiAAO/JSRiIjcGRWCiIgAKgQREblOhSAiIoAKQUREritp7wAixUVgYCA1a9bE1fW/v0fVq1eP8ePH065dO0qVKoWHhwcuLi5kZmby2GOPMXz4cFxdXW/Yn5GRgaurK0OHDi3217+I41AhiNyBOXPmUKFChZvumzJlSs7FUBkZGfTu3ZsFCxbw17/+9Yb98MdV1OHh4WzZssX2wUXyQU8ZidiAm5sbTZo0ybly9s8Mw+DMmTOUK1eukJOJ3JrOEETuwEsvvZTrKaNZs2bh6+t7w3Hx8fF8//33DBw4MOd7Q4YMwd3dnUuXLgHQqlUrPv74Y1tHFsk3FYLIHbjdU0ZDhgzBw8MDq9VKqVKl6NWrFyEhITn7//OU0enTp3nllVeoXbs2lStXLqzoInlSIYgUkD+/RnArlStXZtKkSfTp04cGDRrkTOgUsTe9hiBiB40bN+app55i9OjRWK1We8cRAVQIInYzaNAgzp49y+LFi+0dRQQATTsVERFAZwgiInKdCkFERAAVgoiIXKdCEBERQIUgIiLXqRBERARQIYiIyHUqBBERAeD/Ab/QxA5hMH76AAAAAElFTkSuQmCC\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plot_roc_curve(fpr,tpr)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 110,
|
|
"id": "8cae5ee2",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0.9751082251082251"
|
|
]
|
|
},
|
|
"execution_count": 110,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.metrics import roc_auc_score\n",
|
|
"\n",
|
|
"roc_auc_score(y_test, y_probs_positive)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|