750 lines
46 KiB
Plaintext
Executable File
750 lines
46 KiB
Plaintext
Executable File
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "e003c92d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import pandas as pd\n",
|
||
"import sklearn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "0a095019",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = pd.read_csv('./data/buldozer/TrainAndValid.csv',\n",
|
||
" low_memory=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "9c376485",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 412698 entries, 0 to 412697\n",
|
||
"Data columns (total 53 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 SalesID 412698 non-null int64 \n",
|
||
" 1 SalePrice 412698 non-null float64\n",
|
||
" 2 MachineID 412698 non-null int64 \n",
|
||
" 3 ModelID 412698 non-null int64 \n",
|
||
" 4 datasource 412698 non-null int64 \n",
|
||
" 5 auctioneerID 392562 non-null float64\n",
|
||
" 6 YearMade 412698 non-null int64 \n",
|
||
" 7 MachineHoursCurrentMeter 147504 non-null float64\n",
|
||
" 8 UsageBand 73670 non-null object \n",
|
||
" 9 saledate 412698 non-null object \n",
|
||
" 10 fiModelDesc 412698 non-null object \n",
|
||
" 11 fiBaseModel 412698 non-null object \n",
|
||
" 12 fiSecondaryDesc 271971 non-null object \n",
|
||
" 13 fiModelSeries 58667 non-null object \n",
|
||
" 14 fiModelDescriptor 74816 non-null object \n",
|
||
" 15 ProductSize 196093 non-null object \n",
|
||
" 16 fiProductClassDesc 412698 non-null object \n",
|
||
" 17 state 412698 non-null object \n",
|
||
" 18 ProductGroup 412698 non-null object \n",
|
||
" 19 ProductGroupDesc 412698 non-null object \n",
|
||
" 20 Drive_System 107087 non-null object \n",
|
||
" 21 Enclosure 412364 non-null object \n",
|
||
" 22 Forks 197715 non-null object \n",
|
||
" 23 Pad_Type 81096 non-null object \n",
|
||
" 24 Ride_Control 152728 non-null object \n",
|
||
" 25 Stick 81096 non-null object \n",
|
||
" 26 Transmission 188007 non-null object \n",
|
||
" 27 Turbocharged 81096 non-null object \n",
|
||
" 28 Blade_Extension 25983 non-null object \n",
|
||
" 29 Blade_Width 25983 non-null object \n",
|
||
" 30 Enclosure_Type 25983 non-null object \n",
|
||
" 31 Engine_Horsepower 25983 non-null object \n",
|
||
" 32 Hydraulics 330133 non-null object \n",
|
||
" 33 Pushblock 25983 non-null object \n",
|
||
" 34 Ripper 106945 non-null object \n",
|
||
" 35 Scarifier 25994 non-null object \n",
|
||
" 36 Tip_Control 25983 non-null object \n",
|
||
" 37 Tire_Size 97638 non-null object \n",
|
||
" 38 Coupler 220679 non-null object \n",
|
||
" 39 Coupler_System 44974 non-null object \n",
|
||
" 40 Grouser_Tracks 44875 non-null object \n",
|
||
" 41 Hydraulics_Flow 44875 non-null object \n",
|
||
" 42 Track_Type 102193 non-null object \n",
|
||
" 43 Undercarriage_Pad_Width 102916 non-null object \n",
|
||
" 44 Stick_Length 102261 non-null object \n",
|
||
" 45 Thumb 102332 non-null object \n",
|
||
" 46 Pattern_Changer 102261 non-null object \n",
|
||
" 47 Grouser_Type 102193 non-null object \n",
|
||
" 48 Backhoe_Mounting 80712 non-null object \n",
|
||
" 49 Blade_Type 81875 non-null object \n",
|
||
" 50 Travel_Controls 81877 non-null object \n",
|
||
" 51 Differential_Type 71564 non-null object \n",
|
||
" 52 Steering_Controls 71522 non-null object \n",
|
||
"dtypes: float64(3), int64(5), object(45)\n",
|
||
"memory usage: 166.9+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "e4a4e252",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAD4CAYAAADRuPC7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAkMElEQVR4nO3df5xW5Xnn8c8lEDLGAv4AKzMSNBJaTVrRKTG1zdqYOGjTMHW1pV0rTdhl19i++pOudLvbJG3XWLa1tammbsyKplGJJUiSKhrQzY9VyFCIBHXC+JMBIhgcgnEkgNf+cV2HOfMwzC84zAx836/X85pn7ufc59znfu5zrvu+z5k55u6IiIgcaScMdQFEROTYpAAjIiKVUIAREZFKKMCIiEglFGBERKQSo4e6AIN12mmn+dSpU4e6GCIiI8ratWtfcfeJR2NbIzbATJ06lZaWlqEuhojIiGJmLx6tbWmKTEREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEiP2LrLBWLZuC4tWtLK1o5PJE+pY0DSd5hn1Q10sEZFj0nETYJat28LCpRvo3LsfgC0dnSxcugFAQUZEpALHzRTZohWtB4JLoXPvfhataB2iEomIHNuOmwCztaNzQOkiInJ4jpspsskT6tjSQzCZPKFuCEojIjJwI+068nEzglnQNJ26MaO6pdWNGcWCpulDVCIRkf4rriNv6ejE6bqOvGzdlqEu2iEdNwGmeUY9N175buon1GFA/YQ6brzy3cM6+ouIFEbideTjZooMIsgooIjISDQSryMfNyMYEZGR7FDXi4fzdWQFGBGREWAkXkc+rqbIRERGqmJ6fyTdRaYAIyIyQoy068iaIhMRkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSvQrwJjZBDO738yeMbOnzey9ZnaKmT1iZpvy58ml5ReaWZuZtZpZUyn9QjPbkJ/dYmaW6WPN7L5MX21mU4/4noqIyFHV3xHM3wMPuftPAT8LPA3cAKx092nAyvwdMzsXmAOcB8wCbjWz4j+03QbMB6bla1amzwNedfdzgJuBmw5zv0REZIj1GWDMbBzwPuAOAHf/sbt3ALOBxbnYYqA5388G7nX3Pe7+PNAGzDSzM4Bx7v64uztwV02eYl33A5cWoxsRERmZ+jOCORvYAfwfM1tnZp81s7cBp7v7NoD8OSmXrwc2l/K3Z1p9vq9N75bH3fcBu4BTawtiZvPNrMXMWnbs2NHPXRQRkaHQnwAzGrgAuM3dZwA/IqfDDqGnkYf3kt5bnu4J7re7e6O7N06cOLH3UouIyJDqT4BpB9rdfXX+fj8RcF7OaS/y5/bS8meW8jcAWzO9oYf0bnnMbDQwHtg50J0REZHho88A4+7fBzabWfHYtEuBp4DlwNxMmws8kO+XA3PyzrCziIv5a3IabbeZXZTXV66tyVOs6ypgVV6nERGREaq/Dxz7XeCfzewtwHPAR4jgtMTM5gEvAVcDuPtGM1tCBKF9wPXuvj/Xcx1wJ1AHPJgviBsI7jazNmLkMucw90tERIaYjdSBQmNjo7e0tAx1MURERhQzW+vujUdjW/pLfhERqYQCjIiIVKK/12DkCFq2bguLVrSytaOTyRPqWNA0neYZ9X1nFBEZQRRgjrJl67awcOkGOvfGfQ9bOjpZuHQDgIKMiBxTNEV2lC1a0XoguBQ69+5n0YrWISqRiEg1FGCOsq0dnQNKFxEZqRRgjrLJE+oGlC4iMlIpwBxlC5qmUzdmVLe0ujGjWNA0/RA5RERGJl3kP8qKC/m6i0xEjnUKMEOgeUa9AoqIHPM0RSYiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKV6FeAMbMXzGyDma03s5ZMO8XMHjGzTfnz5NLyC82szcxazayplH5hrqfNzG4xM8v0sWZ2X6avNrOpR3g/RUTkKBvICOaX3P18d2/M328AVrr7NGBl/o6ZnQvMAc4DZgG3mtmozHMbMB+Ylq9ZmT4PeNXdzwFuBm4a/C6JiMhwcDhTZLOBxfl+MdBcSr/X3fe4+/NAGzDTzM4Axrn74+7uwF01eYp13Q9cWoxuRERkZOpvgHHgYTNba2bzM+10d98GkD8nZXo9sLmUtz3T6vN9bXq3PO6+D9gFnFpbCDObb2YtZtayY8eOfhZdRESGwuh+Lnexu281s0nAI2b2TC/L9jTy8F7Se8vTPcH9duB2gMbGxoM+FxGR4aNfIxh335o/twNfAmYCL+e0F/lzey7eDpxZyt4AbM30hh7Su+Uxs9HAeGDnwHdHRESGiz4DjJm9zcx+ongPXAZ8F1gOzM3F5gIP5PvlwJy8M+ws4mL+mpxG221mF+X1lWtr8hTrugpYlddpRERkhOrPFNnpwJfymvto4Avu/pCZfRtYYmbzgJeAqwHcfaOZLQGeAvYB17v7/lzXdcCdQB3wYL4A7gDuNrM2YuQy5wjsm4iIDCEbqQOFxsZGb2lpGepiiIiMKGa2tvTnJpXSX/KLiEglFGBERKQSCjAiIlIJBRgREamEAoyIiFRCAUZERCqhACMiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEglFGBERKQS/Q4wZjbKzNaZ2Vfy91PM7BEz25Q/Ty4tu9DM2sys1cyaSukXmtmG/OwWM7NMH2tm92X6ajObegT3UUREhsBARjC/Bzxd+v0GYKW7TwNW5u+Y2bnAHOA8YBZwq5mNyjy3AfOBafmalenzgFfd/RzgZuCmQe2NiIgMG/0KMGbWAPwy8NlS8mxgcb5fDDSX0u919z3u/jzQBsw0szOAce7+uLs7cFdNnmJd9wOXFqMbEREZmfo7gvk74E+AN0tpp7v7NoD8OSnT64HNpeXaM60+39emd8vj7vuAXcCptYUws/lm1mJmLTt27Ohn0UVEZCj0GWDM7EPAdndf28919jTy8F7Se8vTPcH9dndvdPfGiRMn9rM4IiIyFEb3Y5mLgQ+b2RXAW4FxZvZ54GUzO8Pdt+X01/Zcvh04s5S/Adia6Q09pJfztJvZaGA8sHOQ+yQjzLJ1W1i0opWtHZ1MnlDHgqbpNM+o7zujiAxrfY5g3H2huze4+1Ti4v0qd78GWA7MzcXmAg/k++XAnLwz7CziYv6anEbbbWYX5fWVa2vyFOu6Krdx0AhGjj3L1m1h4dINbOnoxIEtHZ0sXLqBZeu2DHXRROQwHc7fwXwK+KCZbQI+mL/j7huBJcBTwEPA9e6+P/NcR9wo0AY8CzyY6XcAp5pZG/CH5B1pcuxbtKKVzr37u6V17t3PohWtQ1QiETlS+jNFdoC7PwY8lu9/AFx6iOX+CvirHtJbgHf1kP4GcPVAyiLHhq0dnQNKF5GRQ3/JL0Nq8oS6AaWLyMihACNDakHTdOrGjOqWVjdmFAuapg9RiUTkSBnQFJnIkVbcLaa7yESOPQowMuSaZ9QroIgcgzRFJiIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEglFGBERKQSCjAiIlIJBRgREamEAoyIiFRCAUZERCqhACMiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqUSfAcbM3mpma8zsO2a20cw+kemnmNkjZrYpf55cyrPQzNrMrNXMmkrpF5rZhvzsFjOzTB9rZvdl+mozm1rBvoqIyFHUnxHMHuD97v6zwPnALDO7CLgBWOnu04CV+Ttmdi4wBzgPmAXcamajcl23AfOBafmalenzgFfd/RzgZuCmw981EREZSn0GGA+v5a9j8uXAbGBxpi8GmvP9bOBed9/j7s8DbcBMMzsDGOfuj7u7A3fV5CnWdT9waTG6ERGRkalf12DMbJSZrQe2A4+4+2rgdHffBpA/J+Xi9cDmUvb2TKvP97Xp3fK4+z5gF3BqD+WYb2YtZtayY8eOfu2giIgMjX4FGHff7+7nAw3EaORdvSze08jDe0nvLU9tOW5390Z3b5w4cWIfpRYRkaE0oLvI3L0DeIy4dvJyTnuRP7fnYu3AmaVsDcDWTG/oIb1bHjMbDYwHdg6kbCIiMrz05y6yiWY2Id/XAR8AngGWA3NzsbnAA/l+OTAn7ww7i7iYvyan0Xab2UV5feXamjzFuq4CVuV1GhERGaFG92OZM4DFeSfYCcASd/+KmT0OLDGzecBLwNUA7r7RzJYATwH7gOvdfX+u6zrgTqAOeDBfAHcAd5tZGzFymXMkdk5ERIaOjdSBQmNjo7e0tAx1MURERhQzW+vujUdjW/pLfhERqYQCjIiIVKI/12BERIbEsnVbWLSila0dnUyeUMeCpuk0z6jvO6MMCwowIjIsLVu3hYVLN9C5N+4R2tLRycKlGwAUZEYIBRiRYUQ99i6LVrQeCC6Fzr37WbSi9bitk5FGAUZkmFCPvbutHZ0DSpfhRxf5RYaJ3nrsx6PJE+oGlC7DjwKMyDChHnt3C5qmUzdmVLe0ujGjWNA0fYhKJAOlACMyTKjH3l3zjHpuvPLd1E+ow4D6CXXceOW7j8vpwpFK12BEhokFTdO7XYMB9dibZ9QroIxgCjAiw0RxItVdZHKsUIARGUbUY5djia7BiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEgl9Jf8IiJ90IPgBkcBRkSkF3oQ3OApwIjIQdRj76JHNw+eAoyIdKMee3d6ENzg6SK/iHSjRzd3pwfBDV6fAcbMzjSzR83saTPbaGa/l+mnmNkjZrYpf55cyrPQzNrMrNXMmkrpF5rZhvzsFjOzTB9rZvdl+mozm1rBvopIP6jH3p0e3Tx4/RnB7AP+yN1/GrgIuN7MzgVuAFa6+zRgZf5OfjYHOA+YBdxqZsW3cxswH5iWr1mZPg941d3PAW4GbjoC+yYig6Aee3d6dPPg9XkNxt23Advy/W4zexqoB2YDl+Rii4HHgP+a6fe6+x7geTNrA2aa2QvAOHd/HMDM7gKagQczz8dzXfcDnzYzc3c/7D0UkQHRo5sPpgfBDc6ALvLn1NUMYDVwegYf3H2bmU3KxeqBJ0rZ2jNtb76vTS/ybM517TOzXcCpwCsDKZ+IHD49ulmOlH4HGDM7CfgX4Pfd/Yd5+aTHRXtI817Se8tTW4b5xBQbU6ZM6avIIjJI6rHLkdCvu8jMbAwRXP7Z3Zdm8stmdkZ+fgawPdPbgTNL2RuArZne0EN6tzxmNhoYD+ysLYe73+7uje7eOHHixP4UXUREhkh/7iIz4A7gaXf/29JHy4G5+X4u8EApfU7eGXYWcTF/TU6n7Tazi3Kd19bkKdZ1FbBK119EREa2/kyRXQz8FrDBzNZn2p8CnwKWmNk84CXgagB332hmS4CniDvQrnf34mrhdcCdQB1xcf/BTL8DuDtvCNhJ3IUmIiIjmI3UgUJjY6O3tLQMdTFEREYUM1vr7o1HY1v6S34REamEAoyIiFRCAUZERCqhACMiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEglFGBERKQSo4e6ACLDxbJ1W1i0opWtHZ1MnlDHgqbpNM+oH+piiYxYCjAiRHBZuHQDnXv3A7Clo5OFSzcAKMiIDJKmyESARStaDwSXQufe/Sxa0TpEJRIZ+RRgRICtHZ0DSheRvvUZYMzsc2a23cy+W0o7xcweMbNN+fPk0mcLzazNzFrNrKmUfqGZbcjPbjEzy/SxZnZfpq82s6lHeB/lEJat28LFn1rFWTd8lYs/tYpl67YMdZGGzOQJdQNKF5G+9WcEcycwqybtBmClu08DVubvmNm5wBzgvMxzq5mNyjy3AfOBafkq1jkPeNXdzwFuBm4a7M5I/xXXHLZ0dOJ0XXM4XoPMgqbp1I0Z1S2tbswoFjRNH6ISiYx8fQYYd/86sLMmeTawON8vBppL6fe6+x53fx5oA2aa2RnAOHd/3N0duKsmT7Gu+4FLi9GNVEfXHLprnlHPjVe+m/oJdRhQP6GOG698ty7wixyGwd5Fdrq7bwNw921mNinT64EnSsu1Z9refF+bXuTZnOvaZ2a7gFOBV2o3ambziVEQU6ZMGWTRBXTNoSfNM+oVUESOoCN9kb+nkYf3kt5bnoMT3W9390Z3b5w4ceIgiyigaw4iUr3BBpiXc9qL/Lk909uBM0vLNQBbM72hh/RuecxsNDCeg6fk5AjTNQcRqdpgA8xyYG6+nws8UEqfk3eGnUVczF+T02m7zeyivL5ybU2eYl1XAavyOo1USNccRKRqfV6DMbN7gEuA08ysHfhz4FPAEjObB7wEXA3g7hvNbAnwFLAPuN7diyvJ1xF3pNUBD+YL4A7gbjNrI0Yuc47InkmfdM1BRKpkI3Ww0NjY6C0tLUNdDBGREcXM1rp749HYlv6SX0REKqEAIyIilVCAERGRSijAiIhIJUbsRX4z2wG8OMjsp9HDfwoYAipHdyrH8CoDqBy1joVyvN3dj8pfqo/YAHM4zKzlaN1FoXKoHCO1DCqHynG4NEUmIiKVUIAREZFKHK8B5vahLkBSObpTOboMhzKAylFL5RiA4/IajIiIVO94HcGIiEjFFGBERKQa7n7EX8DniGfEfLcm/WpgI/Am0NhDvrXAW4C/Ip5y+VoPy/wa8d+aNwJfqPnsIeIJmf8MtALfzbKcBKwBvgP8gPivzU8CF2S+9xL/3Xkz8Abw/SzL+/Pz38317czXM8C/L233DGAD8Xc5ncQzbp4Efr20zM8DPwT25+tFYCrxwLVbcr37s96ez/WtBT4ArAO+knVzO/C9ogzAWOC+zNMBXAE8nvVzoAzAhZnnjaz/jqyPtZm/Iz97AVhKPFX0UeA14B+LMuS6fj3XvRH461IZ2oB/A74OnF9bjizDhtzGDmAP8Gp+vjfrYHem/wPwv4EPZhl35jLP13zf38ltfIb4T939LUdR33uBTZnvpKyjH2f6j4g2dEFpm5tzXTuy7K/VtMEpwGP52Z5c39RDtNEXss5/CHwa+INc9w9y+3uyvqbSSxvNfXwi62IH8R/O+91G6TpeW4njZHe+NhFt7U3gXuDlLNNLxCM2iuP1f2ad7aupi5uB9Vmu3Vnfq4v64ODjtfiP6m353RV1sZton5sz/6/2UhcfJ5411ZmvZ4m/F/m7AR6vZ9XUxRrgk6UyvVFTF+8F/l9+/mqW4TvAJTV18mdEe3wFeB14GlhJ/G1KsczcXO+erPNbiPNET2WaCizIel5PtNf9wCk9nDuL46+tWOchzt8Lc5lWoGmg+butq6IA8z7gAg4OMD8NTCcOwMaaz6YCy/P9RdkIag/eacSJ7uT8fVLpszri2TMQJ1nL1z3EowJOyvSHspHOA1bn8p/MRvoh4O3ZMD4MbAF+Cfga8Jf5mkSM/E4rbft3stH9XH4Jz+W+bgMm5DLLgWXEifAOYBdxkFyR63+Vrgb8InAy8K5c7gtEgPkE8Je5vhOIP7b6WK7zI8DdwFeBabnM5KIM2RhXEo9DeB14OOvlY8C/ZlnmZNm/B7wN+AXgvwDfLJXhVKLxT8xtLAb+HvhM/n4b0dDf2UM51hIH4pJc5vIs+71ZpguAPyU6EA8TAXRG5n8f0UHZW6r3cfnTgH/Jeu1vOa7LbXSWynFP7uOHMu/TWadFO7mMCAYnAhcTJ8Ef1bTR23M9nwHOJU749x2ija7J+riHCAjfB5qJNvAs8Bd0Bb/e2ujDuQ+fyHU9xsDa6BVZ97uKOszX/UTbeJI4Zp7LOmgh2sCDue6/Jzol3QJMadsfI9rQ54g2dl8PdWFZHw8DN2R7+ATRLtfmNldn/o291MXHgT/uoeP6vgEer0tq6uILRGermTheXwMWZbmey2VfAn6feCzJ14D35LZPKG27NT//JeK5WPcRbbFoI6fk+tZmXT8HPJLfb22ZPlvkK63/V4hnavX0PRTtzbJeL+9hmXOzPscSAe1ZYFR/89e+Kpkic/ev08NTKd39aXdvPUS2y4mTP+7+hMdDymr9J+Af3f3VXG576bNLiAMLd/9XT0SlNLj7a8Bs4gAcQ0T6CflEztnARnf/iru/SJzwzgPeClxPPP9mLnCju2939zfdvfxXtP8B+Jq7f9vd1xIN4nyiVzgxH7L288SJaTHRMPZk2mziRPYG8DfEM3q+Dswiem1FHoCPAjfmPhZlmJ2fzyJGE+8hehi4+9Ysw7nAOOBniZPGj4gecnPm3wPclZ+dSTSgce7+TeJE8PYsM8DZwPfcfUf+/jWiR1mU8dRcfpO7byqVYycR6J4gepqLcvuLie++zd3/LfdhKtGZ+Jq7r3P3rdmmvg2YmY3N9f4wtzma6Ek3DqAcn8nlXymV44PE6PlXsnz1xIm6aCdXAxvc/XV3/xbxPdc+U8mJk9Zi4umsm4BLsw1cQrZRoqM0zt0fJ9ro80QnqJl4ltI24BvE6OED9N5GIb7fjwIrgK0DbKObsl7eRnz/i/M1DphJdGReAB5x94eBn8jybTOzBqIj9GkObXau6x6ijV1KzfEK/GQu8yDRuewkRlefzzLdTXxv3yQ6qYeqi26PaTWzaUSn8Bv9rIvieH1/TV3UE8dCM9EZ+EHm+wngW8Tx9yjZboH/C/w74hhuzLKMy7L8k7s/SgStS4ljonjib1Ou721Z14/k9pp7KNMUutpW4TeynrvJ9jvO3R/P8+Jduc5as4F73X2Puz9PnEtmDiB/N8PpGswsMsD04p3AO83sW2b2hJnNKn12IEAVzGwM8FvAQ2Y2iuj93EYcKKuJofG5RKN8vpS1negxrwPOIQ7wScAzZvaMmX3RzE7PbYwivugNNfkvIk56vwX8JtHQJhPDeoiD5QfEybQO6HT3XZn3NaJBf5GY6tlD14nsL8zs30plqCd6btPdfQPRCz01y/a9LMNeYqjd4e778vdfzH18F3GQbM4yFSe34klkv0kM/d/M39uAnzKzqfmI6+bMtznr4p3Eyaoow3ozm0kEyuczvYPo7dXn/r6N6J2R5dsTb30X3V0OvOHue4oEM1tBnBh2E9/jQMrxw6yLohwnZT3UE6PIUcSJqT3TzgYmm9mpZnYiXT3vso8T7WEp0fv/3dJ3cjlwZS5XD7SX2uhXiJP4NfmdbM8TTHvWx1s4dBv9PeB/ESfmfwB+ZhBtdDYxtVK00fZ8v4uuILa5lHcSMYr4O+BP6GofPZkKnE70rPflOn+VOC7Xl+pjS5blfiKwvJMIXG8hRqjtRHDZT9cj12vrYj/wO2b2pJl9jhjZ3+fFUHdwx2s78f29Snw/84hjqfh+9uV230scH1cSbeM8orNRPEb+A0R72wwH2vouohP7YNZFPdGW20tlOyHrsLZMxfdTtPETifPovxz8FRxo4+V9rs98HzazT5aW29zDcofM35thEWDM7C3EKOO5PhYdTfQQLiEi9WfNbEJ+djHRuym7Ffi6u3/D48ma3yS+/Jlm9q5c5iJiSFhWT4wu/nNu8xSi8X2aGDo+ThzQECOGzTX5T8ryfcTd/wcx7Ic4GZ0I/BNxEvdMm0KOOkouIoLfb5byNgDfcvcLSmUwYhppdSmvZ4/DiQOs1sdy+eJEe3KmX0ZMTXiu40PESbgYrZCjx+uIof03iJNi4T2lchT3v19O9D6LBlw+ITsxmhxFnGALY4ih+QFmdh4xdVI+seDuTcSJdSxR74MtR7fNEdMXTgR5sowTiGnSR+i6/lPrN4gTwc8RJ5m7S9u4mPhOy9u/lRixPkf04h8lgu+JZnZNLjOWmBYqK7fR64D/nmW8nTjxD7SNFj38Q9WL1bw/iQgy23MU0JvxwFe96+m2ECOjb7r7+aV1/hRRF98lpmeL0eUJRECCOC5+XLP+cl3cBryDGJFsI+qm3KMf6PFa3ucTiXq6ExhT+n4aiIB1ExGgZhLnqV8gOmf7crlZROexdtvnA4uyLoqp/bJDfQ9e8/NXiPPDQbNHPazzQD53X5773dtyh8zfm2ERYIjedG1w6Ek78IC7783hWyswzczOBja7+4GGZ2Z/DkwE/rAm/8nE0HwW0TBmEA3qzMzXAPwRcI+7P5t5vkCceBcRPbWVxJw1xInrW6X8xVTFF9z9iVzmFeLktJUYWj6aZR9PnOSnECdyskyTsnx7clv3Zh3tB76Uy30xy9BO9D4fyhHFeKKX9FXgz7IM7UQPckIuU0c09KIn9JYs/xVEm/jJLOvFWT/XZhneb2afd/cvu/t73P29uR8/yPyXZ12OB3ZmXXyVuLD5UO5bURdTchtXZXnrsv5GEz3mAz3M/E6+RHyXtScX3P0N4hrXCQMsxzgimG3NtNcy/4nEHPkbxCioIcv7TXe/w90vcPf35We1Pfd5WSdn5vTXW3N/x9O9jbYTU5ZFG23KMj1HtIU1xEmzgTgJfZlDt9G5xJTJ68QF35l0tQ/oXxvdSQSoLblcA3GCHp/14HT1xKdlWScCHzazF4j2McrMPl/7/WR9PpHbHk0cgy+Uj1fi5p1xWRcfyDp4heilv0h0KhtK+zSpp7pw95fdfb+7v0mcU06sCYADOV7LdbGPaF/fy/ffp+v7eQfwjVLbaCAC/d/kejblemfm+2LblxEdvCtKo/JiJF1MmTVk3b/QQ5mK76cIKHPoYXqstN6G0u8N1HTWSsud2cNy/c3fzXAJMLOIude+LCMOfMzsNGII/Rw102Nm9h+JA/Y33P1NM5uYI53lwG8TDdiJ4eU04i6WaWb2M8S0xo+IqYZim+8nDvBriJPxDOJCNMQc6i3AZWY2KZc7ga6eMjk8f5S4aHYi0SPdDKzKMtUDF5rZB4m55w8B17v7JHefSjScVcTUwSWl7T6V+a8gAtFVRPD8EnCXu38xt7+NGHZ/h5h//kiW4cEsaycRRH6RuLi4y923uftC4oC/qyiDu1+T+4mZnUyMhu4iTnKXEieJVcSJ+0A5SmV4T9bFAuABYD4xtTE39+sq4gDemtuYQASHhVk2Mv2kHKUVJ63i7rmBlONx4vrCA5nva0QQegdxUl2Vy+0iTg4PlvZ9CjEaLnqnhZeIE+JcM/tp4qSwkoOncH+ZCD43E21xJhFkVxCj5l/P72VU/uytjW4l5vu/TNyJtomu9gH9aKPpR8SJZG6+ijb6CjFFc1m20TFEkPiIuzeU2uh+d7+mvEIzm050IIrAcBXRqao9Xi8mAvPMrMNGYprx2izXO4jvoTHz9lgXRZtIf0TXyb0wkOO1XBcvEh2+FcQNAT9HdC6K6ezbzWySmZ2Y+3wlEQD2uftTOQJ/hjhe55rZDGJ0+7B3v5a8ghj5vJ51fVlu74EeyrSZOCbdzMYTbeABelC0ezO7KK/ZXHuIZZcDc8xsrJmdRZwf1wwg/0EbruIusnuycvdmhczL9F+la075ZWBFpn8bqCvl/+tc7s38+fFiChX4W+LA2QDMyfQv0/1W0H3EFMv6fBW32T5J123KG4jh7J3edSfLK7nNbaW89cR8cCtxYnqWOGFMIXpwqzL/R4lejRMHSJH/n4g7XH4tPytuU3498zQSF/m3Z73syp/rS69mYgrp7cQUwpOlMjRk3jai1/sHWe/riRPTemII3pj7sCe335H1+OdEb3d3pj9D3uFH9Jp2EgfRDuCx0vf7VL7mECfKB3Kf1hDXKq7JchRlWE9M932XOFhfzrrck/vwReLEszdfHcR3/xfECeTVTHfihPoHRLspbpf+B+JaTn/L0UG0E8+f3yaC0mulMvyYaC+N+XkdMUXyVNb5Dg5uo+cS0yIddN2mfDbZRoH1pTbanuXaQ0zpfSKXL27J3kN8r/f30UY/RATfp4ie/yYG1kbbStv8MRFcX8v1bMtyvJ4/i9f8muN1R673QF3kZx8nRv5fpKuNrqTrVuX1dB2vrVkfO3O54mL6buK4+H5u5+xe6mIJcWw/SbSbXyyVZSDH69nEd/5avr5N3C1XlKm4Tfn7xMgWom1sys9eJDosb8/P/pjo3L416+L1rO+ncrvL6WobHyWCxx6inX2aOPf1VKazM89vExfnezsvNxLH37PFOjP9w8AnS8v9t9L3cXlf+XvdZhUBZoDBqIG83XGQ+ccCLYPM+2dkkBpk/muAGw5z/4e8DCpHn+s8rtuo6uLwy0RctzvjSG57JLz0v8hERKQSw+UajIiIHGMUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKvH/AQqaVwTx6jxHAAAAAElFTkSuQmCC\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"fig, ax = plt.subplots()\n",
|
||
"ax.scatter(df['saledate'][:1000],df['SalePrice'][:1000]);"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "ed600fdb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAD4CAYAAADRuPC7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAaoklEQVR4nO3dcZSV9X3n8fcnTIKaBAUZDZmhHYycpOhJVpkQ0mS7NjRC1Irp0e54mpVNadlatpu025NA7InZ5HAONG1M2K4mNFKBGIXQJLJxXUOwabbnGHCMGkClTIXoBCrThUWaRgzmu388v1ueud4ZLsP87r0zfl7nPOc+z/d5fs/zvaPMd37P73efq4jAzMxstL2m2QmYmdn45AJjZmZZuMCYmVkWLjBmZpaFC4yZmWXR1uwEWsXUqVOjq6ur2WmYmY0pjz766D9FRHutfS4wSVdXF729vc1Ow8xsTJH0o6H2+RaZmZll4QJjZmZZuMCYmVkWLjBmZpaFC4yZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeFP8o+SrmX3N+W6+1de3ZTrmpmdinswZmaWhQuMmZll4QJjZmZZuMCYmVkWLjBmZpaFC4yZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeECY2ZmWWQrMJLWSjokaVeNfX8sKSRNLcWWS+qTtEfS/FJ8tqSdad9qSUrxiZI2pvh2SV2lNosk7U3Lolzv0czMhpazB3MXsKA6KGk68H7g2VJsFtADXJLa3C5pQtp9B7AEmJmWyjkXA0ci4mLgNmBVOtcU4FbgXcAc4FZJk0f5vZmZ2SlkKzAR8T3gcI1dtwEfA6IUWwjcGxHHI2If0AfMkTQNmBQRD0dEAOuB60pt1qX1zcC81LuZD2yNiMMRcQTYSo1CZ2ZmeTV0DEbStcCPI+KJql0dwHOl7f4U60jr1fFBbSLiBHAUOH+Yc9XKZ4mkXkm9AwMDI3pPZmZWW8MKjKRzgFuAT9baXSMWw8RH2mZwMGJNRHRHRHd7e3utQ8zMbIQa2YN5CzADeELSfqAT+IGkN1H0MqaXju0EDqR4Z4045TaS2oBzKW7JDXUuMzNroIYVmIjYGREXRERXRHRRFILLI+IfgS1AT5oZNoNiMH9HRBwEjkmam8ZXbgLuS6fcAlRmiF0PPJTGaR4ErpQ0OQ3uX5liZmbWQNm+MlnSPcAVwFRJ/cCtEXFnrWMjYrekTcCTwAlgaUS8nHbfTDEj7WzggbQA3AlskNRH0XPpSec6LOkzwCPpuE9HRK3JBmZmllG2AhMRN55if1fV9gpgRY3jeoFLa8RfBG4Y4txrgbWnka6ZmY0yf5LfzMyycIExM7MsXGDMzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLFxgzMwsCxcYMzPLwgXGzMyycIExM7MsXGDMzCwLFxgzM8vCBcbMzLJwgTEzsyyyFRhJayUdkrSrFPuspKcl/VDSNySdV9q3XFKfpD2S5pfisyXtTPtWS1KKT5S0McW3S+oqtVkkaW9aFuV6j2ZmNrScPZi7gAVVsa3ApRHxduDvgeUAkmYBPcAlqc3tkiakNncAS4CZaamcczFwJCIuBm4DVqVzTQFuBd4FzAFulTQ5w/szM7NhZCswEfE94HBV7NsRcSJtfh/oTOsLgXsj4nhE7AP6gDmSpgGTIuLhiAhgPXBdqc26tL4ZmJd6N/OBrRFxOCKOUBS16kJnZmaZNXMM5reBB9J6B/BcaV9/inWk9er4oDapaB0Fzh/mXK8gaYmkXkm9AwMDZ/RmzMxssKYUGEm3ACeAuyuhGofFMPGRthkcjFgTEd0R0d3e3j580mZmdlraGn3BNOh+DTAv3faCopcxvXRYJ3AgxTtrxMtt+iW1AedS3JLrB66oavPdUX0TLaRr2f1Nu/b+lVc37dpm1voa2oORtAD4OHBtRPxLadcWoCfNDJtBMZi/IyIOAsckzU3jKzcB95XaVGaIXQ88lArWg8CVkianwf0rU8zMzBooWw9G0j0UPYmpkvopZnYtByYCW9Ns4+9HxO9FxG5Jm4AnKW6dLY2Il9OpbqaYkXY2xZhNZdzmTmCDpD6KnksPQEQclvQZ4JF03KcjYtBkAzMzyy9bgYmIG2uE7xzm+BXAihrxXuDSGvEXgRuGONdaYG3dyZqZ2ajzJ/nNzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLFxgzMwsCxcYMzPLwgXGzMyycIExM7MsXGDMzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsshWYCStlXRI0q5SbIqkrZL2ptfJpX3LJfVJ2iNpfik+W9LOtG+1JKX4REkbU3y7pK5Sm0XpGnslLcr1Hs3MbGg5ezB3AQuqYsuAbRExE9iWtpE0C+gBLkltbpc0IbW5A1gCzExL5ZyLgSMRcTFwG7AqnWsKcCvwLmAOcGu5kJmZWWNkKzAR8T3gcFV4IbAura8DrivF742I4xGxD+gD5kiaBkyKiIcjIoD1VW0q59oMzEu9m/nA1og4HBFHgK28stCZmVlmjR6DuTAiDgKk1wtSvAN4rnRcf4p1pPXq+KA2EXECOAqcP8y5XkHSEkm9knoHBgbO4G2ZmVm1VhnkV41YDBMfaZvBwYg1EdEdEd3t7e11JWpmZvVpdIF5Pt32Ir0eSvF+YHrpuE7gQIp31ogPaiOpDTiX4pbcUOcyM7MGanSB2QJUZnUtAu4rxXvSzLAZFIP5O9JttGOS5qbxlZuq2lTOdT3wUBqneRC4UtLkNLh/ZYqZmVkDteU6saR7gCuAqZL6KWZ2rQQ2SVoMPAvcABARuyVtAp4ETgBLI+LldKqbKWaknQ08kBaAO4ENkvooei496VyHJX0GeCQd9+mIqJ5sYGZmmdVVYCRdGhG7Tn3kSRFx4xC75g1x/ApgRY14L3BpjfiLpAJVY99aYG3dyZqZ2air9xbZFyXtkPT7ks7LmZCZmY0PdRWYiHgv8FsUg+e9kr4q6f1ZMzMzszGt7kH+iNgL/AnwceDfAaslPS3pN3IlZ2ZmY1ddBUbS2yXdBjwFvA/49Yj4pbR+W8b8zMxsjKp3FtlfAH8JfCIifloJRsQBSX+SJTMzMxvT6i0wVwE/rUwdlvQa4KyI+JeI2JAtOzMzG7PqHYP5DsXnUCrOSTEzM7Oa6i0wZ0XEP1c20vo5eVIyM7PxoN4C8xNJl1c2JM0GfjrM8WZm9ipX7xjMR4GvSao8NHIa8O+zZGRmZuNCXQUmIh6R9DbgrRSPw386In6WNTMzMxvTTudhl+8EulKbyyQREeuzZGVmZmNevQ+73AC8BXgcqDzluPIVxmZmZq9Qbw+mG5iVvm/FzMzslOqdRbYLeFPORMzMbHyptwczFXhS0g7geCUYEddmycrMzMa8egvMp3ImYWZm40+905T/VtIvAjMj4juSzgEm5E3NzMzGsnof1/+7wGbgSynUAXxzpBeV9IeSdkvaJekeSWdJmiJpq6S96XVy6fjlkvok7ZE0vxSfLWln2rdaklJ8oqSNKb5dUtdIczUzs5Gpd5B/KfAe4AX41y8fu2AkF5TUAfwXoDsiLqXoCfUAy4BtETET2Ja2kTQr7b8EWADcLqnSe7oDWALMTMuCFF8MHImIiym+r2bVSHI1M7ORq7fAHI+IlyobktooPgczUm3A2ek85wAHgIXAurR/HXBdWl8I3BsRxyNiH9AHzJE0DZgUEQ+n6dPrq9pUzrUZmFfp3ZiZWWPUW2D+VtInKIrC+4GvAf9zJBeMiB8DfwY8CxwEjkbEt4ELI+JgOuYgJ3tIHcBzpVP0p1hHWq+OD2oTESeAo8D51blIWiKpV1LvwMDASN6OmZkNod4CswwYAHYC/wn4X8CIvskyja0sBGYAbwZeL+lDwzWpEYth4sO1GRyIWBMR3RHR3d7ePnziZmZ2WuqdRfZziq9M/stRuOavAfsiYgBA0teBXwaelzQtIg6m21+H0vH9wPRS+06KW2r9ab06Xm7Tn27DnQscHoXczcysTvXOItsn6ZnqZYTXfBaYK+mcNC4yD3gK2AIsSscsAu5L61uAnjQzbAbFYP6OdBvtmKS56Tw3VbWpnOt64CE/5sbMrLFO51lkFWcBNwBTRnLBiNguaTPwA+AE8BiwBngDsEnSYooidEM6frekTcCT6filEVF54ObNwF0UX+f8QFoA7gQ2SOqj6Ln0jCRXMzMbOY30D3tJfxcR7x3lfJqmu7s7ent7R9y+a9n9o5jN2LB/5dXNTsHMmkzSoxHRXWtfvY/rv7y0+RqKHs0bRyE3MzMbp+q9RfbnpfUTwH7gN0c9GzMzGzfqnUX2q7kTMTOz8aXeW2R/NNz+iPjc6KRjZmbjxenMInsnxfRfgF8HvsfgT9ibmZn9q9P5wrHLI+IYgKRPAV+LiN/JlZiZmY1t9T4q5heAl0rbLwFdo56NmZmNG/X2YDYAOyR9g+KZXh+keHqxmZlZTfXOIlsh6QHg36bQhyPisXxpmZnZWFfvLTIovrflhYj4AsVDJGdkysnMzMaBeh92eSvwcWB5Cr0W+EqupMzMbOyrtwfzQeBa4CcAEXEAPyrGzMyGUW+BeSk97j4AJL0+X0pmZjYe1FtgNkn6EnCepN8FvsPofPmYmZmNU6ecRZa+zGsj8DbgBeCtwCcjYmvm3MzMbAw7ZYGJiJD0zYiYDbiomJlZXeq9RfZ9Se/MmomZmY0r9X6S/1eB35O0n2ImmSg6N2/PlZiZmY1tw/ZgJP1CWv0AcBHwPoonKV+TXkdE0nmSNkt6WtJTkt4taYqkrZL2ptfJpeOXS+qTtEfS/FJ8tqSdad/qNF6EpImSNqb4dkldI83VzMxG5lS3yL4JEBE/Aj4XET8qL2dw3S8A/zsi3ga8A3gKWAZsi4iZwLa0jaRZQA9wCbAAuF3ShHSeO4AlwMy0LEjxxcCRiLgYuA1YdQa5mpnZCJyqwKi0ftFoXFDSJOBXgDsBIuKliPh/wEJgXTpsHXBdWl8I3BsRxyNiH9AHzJE0DZgUEQ+nz+isr2pTOddmYF6ld2NmZo1xqgITQ6yfiYuAAeCvJD0m6cvpg5sXRsRBgPR6QTq+g8FfbNafYh1pvTo+qE1EnACOAuePUv5mZlaHUxWYd0h6QdIx4O1p/QVJxyS9MMJrtgGXA3dExGUUkwaWDXN8rZ5HDBMfrs3gE0tLJPVK6h0YGBg+azMzOy3DFpiImBARkyLijRHRltYr25NGeM1+oD8itqftzRQF5/l024v0eqh0/PRS+07gQIp31ogPaiOpDTgXOFzj/a2JiO6I6G5vbx/h2zEzs1rqnaY8aiLiHyU9J+mtEbEHmAc8mZZFwMr0el9qsgX4qqTPAW+mGMzfEREvp57UXGA7cBPw30ttFgEPA9cDD6VxGhtFXcvub8p196+8uinXNbPT0/ACk/wBcLek1wHPAB+m6E1tkrQYeBa4ASAidkvaRFGATgBLI+LldJ6bgbuAs4EH0gLFBIINkvooei49jXhTZmZ2UlMKTEQ8DnTX2DVviONXACtqxHuBS2vEXyQVKDMza47T+UZLMzOzurnAmJlZFi4wZmaWhQuMmZll4QJjZmZZuMCYmVkWLjBmZpaFC4yZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeECY2ZmWbjAmJlZFi4wZmaWhQuMmZll4QJjZmZZuMCYmVkWLjBmZpZF0wqMpAmSHpP0rbQ9RdJWSXvT6+TSscsl9UnaI2l+KT5b0s60b7UkpfhESRtTfLukroa/QTOzV7lm9mA+AjxV2l4GbIuImcC2tI2kWUAPcAmwALhd0oTU5g5gCTAzLQtSfDFwJCIuBm4DVuV9K2ZmVq0pBUZSJ3A18OVSeCGwLq2vA64rxe+NiOMRsQ/oA+ZImgZMioiHIyKA9VVtKufaDMyr9G7MzKwxmtWD+TzwMeDnpdiFEXEQIL1ekOIdwHOl4/pTrCOtV8cHtYmIE8BR4PzqJCQtkdQrqXdgYOAM35KZmZU1vMBIugY4FBGP1tukRiyGiQ/XZnAgYk1EdEdEd3t7e53pmJlZPdqacM33ANdKugo4C5gk6SvA85KmRcTBdPvrUDq+H5heat8JHEjxzhrxcpt+SW3AucDhXG/IzMxeqeE9mIhYHhGdEdFFMXj/UER8CNgCLEqHLQLuS+tbgJ40M2wGxWD+jnQb7ZikuWl85aaqNpVzXZ+u8YoejJmZ5dOMHsxQVgKbJC0GngVuAIiI3ZI2AU8CJ4ClEfFyanMzcBdwNvBAWgDuBDZI6qPoufQ06k2YmVmhqQUmIr4LfDet/19g3hDHrQBW1Ij3ApfWiL9IKlBmZtYc/iS/mZll4QJjZmZZuMCYmVkWLjBmZpZFK80iM6tL17L7m3Ld/Suvbsp1zcYq92DMzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLFxgzMwsCxcYMzPLwgXGzMyycIExM7Ms/LBLszo16yGb4Adt2tjU8B6MpOmS/kbSU5J2S/pIik+RtFXS3vQ6udRmuaQ+SXskzS/FZ0vamfatlqQUnyhpY4pvl9TV6PdpZvZq14xbZCeA/xoRvwTMBZZKmgUsA7ZFxExgW9om7esBLgEWALdLmpDOdQewBJiZlgUpvhg4EhEXA7cBqxrxxszM7KSGF5iIOBgRP0jrx4CngA5gIbAuHbYOuC6tLwTujYjjEbEP6APmSJoGTIqIhyMigPVVbSrn2gzMq/RuzMysMZo6yJ9uXV0GbAcujIiDUBQh4IJ0WAfwXKlZf4p1pPXq+KA2EXECOAqcX+P6SyT1SuodGBgYpXdlZmbQxAIj6Q3AXwMfjYgXhju0RiyGiQ/XZnAgYk1EdEdEd3t7+6lSNjOz09CUAiPptRTF5e6I+HoKP59ue5FeD6V4PzC91LwTOJDinTXig9pIagPOBQ6P/jsxM7OhNGMWmYA7gaci4nOlXVuARWl9EXBfKd6TZobNoBjM35Fuox2TNDed86aqNpVzXQ88lMZpzMysQZrxOZj3AP8B2Cnp8RT7BLAS2CRpMfAscANAROyWtAl4kmIG2tKIeDm1uxm4CzgbeCAtUBSwDZL6KHouPZnfk5mZVWl4gYmIv6P2GAnAvCHarABW1Ij3ApfWiL9IKlBmZtYcflSMmZll4QJjZmZZuMCYmVkWftil2RjQrAdt+iGbdibcgzEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLDyLzMyG5K+JtjPhHoyZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeECY2ZmWXiaspm1JD/gc+xzD8bMzLJwgTEzsyzG9S0ySQuALwATgC9HxMomp2RmLc5PLxg947YHI2kC8D+ADwCzgBslzWpuVmZmrx7juQczB+iLiGcAJN0LLASebGpWZmZDGG8TG8ZzgekAnitt9wPvKh8gaQmwJG3+s6Q9VeeYCvxTtgxH31jLF8ZezmMtXxh7OTvf/AblrFVndK5fHGrHeC4wqhGLQRsRa4A1Q55A6o2I7tFOLJexli+MvZzHWr4w9nJ2vvk1KudxOwZD0WOZXtruBA40KRczs1ed8VxgHgFmSpoh6XVAD7ClyTmZmb1qjNtbZBFxQtJ/Bh6kmKa8NiJ2n+Zphrx91qLGWr4w9nIea/nC2MvZ+ebXkJwVEac+yszM7DSN51tkZmbWRC4wZmaWhQvMECQtkLRHUp+kZQ287nRJfyPpKUm7JX0kxadI2ippb3qdXGqzPOW5R9L8Uny2pJ1p32pJSvGJkjam+HZJXaOQ9wRJj0n61hjJ9zxJmyU9nX7W727lnCX9Yfr/YZekeySd1Wr5Slor6ZCkXaVYQ3KUtChdY6+kRWeQ72fT/xM/lPQNSee1Sr5D5Vza98eSQtLUlsk5IrxULRSTAv4BuAh4HfAEMKtB154GXJ7W3wj8PcWjbv4UWJbiy4BVaX1Wym8iMCPlPSHt2wG8m+IzQQ8AH0jx3we+mNZ7gI2jkPcfAV8FvpW2Wz3fdcDvpPXXAee1as4UHxreB5ydtjcB/7HV8gV+Bbgc2FWKZc8RmAI8k14np/XJI8z3SqAtra9qpXyHyjnFp1NMaPoRMLVVcs7+C3MsLukH/2BpezmwvEm53Ae8H9gDTEuxacCeWrml/8nenY55uhS/EfhS+Zi03kbxiV6dQY6dwDbgfZwsMK2c7ySKX9iqirdkzpx8KsWUdK5vUfwibLl8gS4G/8LOnmP5mLTvS8CNI8m3at8HgbtbKd+hcgY2A+8A9nOywDQ9Z98iq63WY2Y6Gp1E6p5eBmwHLoyIgwDp9YJ02FC5dqT16vigNhFxAjgKnH8GqX4e+Bjw81KslfO9CBgA/krFbb0vS3p9q+YcET8G/gx4FjgIHI2Ib7dqvlUakWOuf6+/TfHXfUvnK+la4McR8UTVrqbn7AJT2ykfM5M9AekNwF8DH42IF4Y7tEYshokP1+a0SboGOBQRj9bbZIhrNyTfpI3iNsMdEXEZ8BOK2zdDafbPeDLFg1pnAG8GXi/pQ8M1GeLajfwZn8po5jjquUu6BTgB3H0G186er6RzgFuAT9baPYLrj2rOLjC1NfUxM5JeS1Fc7o6Ir6fw85Kmpf3TgEOnyLU/rVfHB7WR1AacCxweYbrvAa6VtB+4F3ifpK+0cL6V8/VHxPa0vZmi4LRqzr8G7IuIgYj4GfB14JdbON+yRuQ4qv9e0wD2NcBvRbof1ML5voXiD48n0r/BTuAHkt7UEjmP5J7weF8o/sJ9Jv2HqwzyX9KgawtYD3y+Kv5ZBg+W/mlav4TBA3nPcHIg7xFgLicH8q5K8aUMHsjbNEq5X8HJMZiWzhf4P8Bb0/qnUr4tmTPFU8B3A+ek66wD/qAV8+WVYzDZc6QYm9pHMfg8Oa1PGWG+Cyi+0qO96riWyLdWzlX79nNyDKbpOWf/hTlWF+Aqihlc/wDc0sDrvpei6/lD4PG0XEVxH3QbsDe9Tim1uSXluYc0GyTFu4Fdad9fcPLJDWcBXwP6KGaTXDRKuV/ByQLT0vkC/wboTT/nb6Z/NC2bM/DfgKfTtTakXxotlS9wD8UY0c8o/uJd3KgcKcZL+tLy4TPIt49irOHxtHyxVfIdKueq/ftJBaYVcvajYszMLAuPwZiZWRYuMGZmloULjJmZZeECY2ZmWbjAmJlZFi4wZmaWhQuMmZll8f8Bis5vTEC2MfMAAAAASUVORK5CYII=\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"df['SalePrice'].plot.hist();"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "a6d54b7d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = pd.read_csv('./data/buldozer/TrainAndValid.csv',\n",
|
||
" low_memory=False,\n",
|
||
" parse_dates=['saledate'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "8796321f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"dtype('<M8[ns]')"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.saledate.dtype"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "92d8711b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0 2006-11-16\n",
|
||
"1 2004-03-26\n",
|
||
"2 2004-02-26\n",
|
||
"3 2011-05-19\n",
|
||
"4 2009-07-23\n",
|
||
"Name: saledate, dtype: datetime64[ns]"
|
||
]
|
||
},
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.saledate.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "a29e2ad0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"205615 1989-01-17\n",
|
||
"233186 1989-01-31\n",
|
||
"142491 1989-01-31\n",
|
||
"115536 1989-01-31\n",
|
||
"92301 1989-01-31\n",
|
||
"Name: saledate, dtype: datetime64[ns]"
|
||
]
|
||
},
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.sort_values(by=['saledate'],inplace=True,ascending=True)\n",
|
||
"df.saledate.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "0a2a301d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_tmp = df.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "34c4362d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>SalesID</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" <th>MachineID</th>\n",
|
||
" <th>ModelID</th>\n",
|
||
" <th>datasource</th>\n",
|
||
" <th>auctioneerID</th>\n",
|
||
" <th>YearMade</th>\n",
|
||
" <th>MachineHoursCurrentMeter</th>\n",
|
||
" <th>UsageBand</th>\n",
|
||
" <th>saledate</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Undercarriage_Pad_Width</th>\n",
|
||
" <th>Stick_Length</th>\n",
|
||
" <th>Thumb</th>\n",
|
||
" <th>Pattern_Changer</th>\n",
|
||
" <th>Grouser_Type</th>\n",
|
||
" <th>Backhoe_Mounting</th>\n",
|
||
" <th>Blade_Type</th>\n",
|
||
" <th>Travel_Controls</th>\n",
|
||
" <th>Differential_Type</th>\n",
|
||
" <th>Steering_Controls</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>205615</th>\n",
|
||
" <td>1646770</td>\n",
|
||
" <td>9500.0</td>\n",
|
||
" <td>1126363</td>\n",
|
||
" <td>8434</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>1974</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-17</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>Straight</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>233186</th>\n",
|
||
" <td>1728883</td>\n",
|
||
" <td>30000.0</td>\n",
|
||
" <td>1523610</td>\n",
|
||
" <td>9105</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1986</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>142491</th>\n",
|
||
" <td>1508502</td>\n",
|
||
" <td>21000.0</td>\n",
|
||
" <td>1153157</td>\n",
|
||
" <td>4138</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1972</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>Angle</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>115536</th>\n",
|
||
" <td>1452578</td>\n",
|
||
" <td>33000.0</td>\n",
|
||
" <td>1544443</td>\n",
|
||
" <td>3854</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1974</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Standard</td>\n",
|
||
" <td>Conventional</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>92301</th>\n",
|
||
" <td>1403418</td>\n",
|
||
" <td>24000.0</td>\n",
|
||
" <td>1390168</td>\n",
|
||
" <td>7110</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1986</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 53 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" SalesID SalePrice MachineID ModelID datasource auctioneerID \\\n",
|
||
"205615 1646770 9500.0 1126363 8434 132 18.0 \n",
|
||
"233186 1728883 30000.0 1523610 9105 132 99.0 \n",
|
||
"142491 1508502 21000.0 1153157 4138 132 99.0 \n",
|
||
"115536 1452578 33000.0 1544443 3854 132 99.0 \n",
|
||
"92301 1403418 24000.0 1390168 7110 132 99.0 \n",
|
||
"\n",
|
||
" YearMade MachineHoursCurrentMeter UsageBand saledate ... \\\n",
|
||
"205615 1974 NaN NaN 1989-01-17 ... \n",
|
||
"233186 1986 NaN NaN 1989-01-31 ... \n",
|
||
"142491 1972 NaN NaN 1989-01-31 ... \n",
|
||
"115536 1974 NaN NaN 1989-01-31 ... \n",
|
||
"92301 1986 NaN NaN 1989-01-31 ... \n",
|
||
"\n",
|
||
" Undercarriage_Pad_Width Stick_Length Thumb Pattern_Changer \\\n",
|
||
"205615 NaN NaN NaN NaN \n",
|
||
"233186 NaN NaN NaN NaN \n",
|
||
"142491 NaN NaN NaN NaN \n",
|
||
"115536 NaN NaN NaN NaN \n",
|
||
"92301 NaN NaN NaN NaN \n",
|
||
"\n",
|
||
" Grouser_Type Backhoe_Mounting Blade_Type Travel_Controls \\\n",
|
||
"205615 NaN None or Unspecified Straight None or Unspecified \n",
|
||
"233186 NaN NaN NaN NaN \n",
|
||
"142491 NaN None or Unspecified Angle None or Unspecified \n",
|
||
"115536 NaN NaN NaN NaN \n",
|
||
"92301 NaN NaN NaN NaN \n",
|
||
"\n",
|
||
" Differential_Type Steering_Controls \n",
|
||
"205615 NaN NaN \n",
|
||
"233186 NaN NaN \n",
|
||
"142491 NaN NaN \n",
|
||
"115536 Standard Conventional \n",
|
||
"92301 NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 53 columns]"
|
||
]
|
||
},
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_tmp.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "5c5497e4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_tmp['saleYear'] = df_tmp[:1].saledate.dt.year\n",
|
||
"df_tmp['saleMonth'] = df_tmp[:1].saledate.dt.month\n",
|
||
"df_tmp['saleDay'] = df_tmp[:1].saledate.dt.day\n",
|
||
"df_tmp['saleDayOfWeek'] = df_tmp[:1].saledate.dt.dayofweek\n",
|
||
"df_tmp['saleDayOYear'] = df_tmp[:1].saledate.dt.dayofyear"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "6f16033a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>SalesID</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" <th>MachineID</th>\n",
|
||
" <th>ModelID</th>\n",
|
||
" <th>datasource</th>\n",
|
||
" <th>auctioneerID</th>\n",
|
||
" <th>YearMade</th>\n",
|
||
" <th>MachineHoursCurrentMeter</th>\n",
|
||
" <th>UsageBand</th>\n",
|
||
" <th>saledate</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Backhoe_Mounting</th>\n",
|
||
" <th>Blade_Type</th>\n",
|
||
" <th>Travel_Controls</th>\n",
|
||
" <th>Differential_Type</th>\n",
|
||
" <th>Steering_Controls</th>\n",
|
||
" <th>saleYear</th>\n",
|
||
" <th>saleMonth</th>\n",
|
||
" <th>saleDay</th>\n",
|
||
" <th>saleDayOfWeek</th>\n",
|
||
" <th>saleDayOYear</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>205615</th>\n",
|
||
" <td>1646770</td>\n",
|
||
" <td>9500.0</td>\n",
|
||
" <td>1126363</td>\n",
|
||
" <td>8434</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>1974</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-17</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>Straight</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>17.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>233186</th>\n",
|
||
" <td>1728883</td>\n",
|
||
" <td>30000.0</td>\n",
|
||
" <td>1523610</td>\n",
|
||
" <td>9105</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1986</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>142491</th>\n",
|
||
" <td>1508502</td>\n",
|
||
" <td>21000.0</td>\n",
|
||
" <td>1153157</td>\n",
|
||
" <td>4138</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1972</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>Angle</td>\n",
|
||
" <td>None or Unspecified</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>115536</th>\n",
|
||
" <td>1452578</td>\n",
|
||
" <td>33000.0</td>\n",
|
||
" <td>1544443</td>\n",
|
||
" <td>3854</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1974</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Standard</td>\n",
|
||
" <td>Conventional</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>92301</th>\n",
|
||
" <td>1403418</td>\n",
|
||
" <td>24000.0</td>\n",
|
||
" <td>1390168</td>\n",
|
||
" <td>7110</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>1986</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1989-01-31</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 58 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" SalesID SalePrice MachineID ModelID datasource auctioneerID \\\n",
|
||
"205615 1646770 9500.0 1126363 8434 132 18.0 \n",
|
||
"233186 1728883 30000.0 1523610 9105 132 99.0 \n",
|
||
"142491 1508502 21000.0 1153157 4138 132 99.0 \n",
|
||
"115536 1452578 33000.0 1544443 3854 132 99.0 \n",
|
||
"92301 1403418 24000.0 1390168 7110 132 99.0 \n",
|
||
"\n",
|
||
" YearMade MachineHoursCurrentMeter UsageBand saledate ... \\\n",
|
||
"205615 1974 NaN NaN 1989-01-17 ... \n",
|
||
"233186 1986 NaN NaN 1989-01-31 ... \n",
|
||
"142491 1972 NaN NaN 1989-01-31 ... \n",
|
||
"115536 1974 NaN NaN 1989-01-31 ... \n",
|
||
"92301 1986 NaN NaN 1989-01-31 ... \n",
|
||
"\n",
|
||
" Backhoe_Mounting Blade_Type Travel_Controls Differential_Type \\\n",
|
||
"205615 None or Unspecified Straight None or Unspecified NaN \n",
|
||
"233186 NaN NaN NaN NaN \n",
|
||
"142491 None or Unspecified Angle None or Unspecified NaN \n",
|
||
"115536 NaN NaN NaN Standard \n",
|
||
"92301 NaN NaN NaN NaN \n",
|
||
"\n",
|
||
" Steering_Controls saleYear saleMonth saleDay saleDayOfWeek saleDayOYear \n",
|
||
"205615 NaN 1989.0 1.0 17.0 1.0 17.0 \n",
|
||
"233186 NaN NaN NaN NaN NaN NaN \n",
|
||
"142491 NaN NaN NaN NaN NaN NaN \n",
|
||
"115536 Conventional NaN NaN NaN NaN NaN \n",
|
||
"92301 NaN NaN NaN NaN NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 58 columns]"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_tmp.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"id": "c02bc1ce",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_tmp.drop('saledate',axis=1,inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e8765a53",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.ensemble import RandomForestRegressor\n",
|
||
"model = RandomForestRegressor(n_jobs=-1,\n",
|
||
" random_state=42)\n",
|
||
"model.fit"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.13.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|