{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e003c92d", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import sklearn" ] }, { "cell_type": "code", "execution_count": 2, "id": "0a095019", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('./data/buldozer/TrainAndValid.csv',\n", " low_memory=False)" ] }, { "cell_type": "code", "execution_count": 4, "id": "9c376485", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 412698 entries, 0 to 412697\n", "Data columns (total 53 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 SalesID 412698 non-null int64 \n", " 1 SalePrice 412698 non-null float64\n", " 2 MachineID 412698 non-null int64 \n", " 3 ModelID 412698 non-null int64 \n", " 4 datasource 412698 non-null int64 \n", " 5 auctioneerID 392562 non-null float64\n", " 6 YearMade 412698 non-null int64 \n", " 7 MachineHoursCurrentMeter 147504 non-null float64\n", " 8 UsageBand 73670 non-null object \n", " 9 saledate 412698 non-null object \n", " 10 fiModelDesc 412698 non-null object \n", " 11 fiBaseModel 412698 non-null object \n", " 12 fiSecondaryDesc 271971 non-null object \n", " 13 fiModelSeries 58667 non-null object \n", " 14 fiModelDescriptor 74816 non-null object \n", " 15 ProductSize 196093 non-null object \n", " 16 fiProductClassDesc 412698 non-null object \n", " 17 state 412698 non-null object \n", " 18 ProductGroup 412698 non-null object \n", " 19 ProductGroupDesc 412698 non-null object \n", " 20 Drive_System 107087 non-null object \n", " 21 Enclosure 412364 non-null object \n", " 22 Forks 197715 non-null object \n", " 23 Pad_Type 81096 non-null object \n", " 24 Ride_Control 152728 non-null object \n", " 25 Stick 81096 non-null object \n", " 26 Transmission 188007 non-null object \n", " 27 Turbocharged 81096 non-null object \n", " 28 Blade_Extension 25983 non-null object \n", " 29 Blade_Width 25983 non-null object \n", " 30 Enclosure_Type 25983 non-null object \n", " 31 Engine_Horsepower 25983 non-null object \n", " 32 Hydraulics 330133 non-null object \n", " 33 Pushblock 25983 non-null object \n", " 34 Ripper 106945 non-null object \n", " 35 Scarifier 25994 non-null object \n", " 36 Tip_Control 25983 non-null object \n", " 37 Tire_Size 97638 non-null object \n", " 38 Coupler 220679 non-null object \n", " 39 Coupler_System 44974 non-null object \n", " 40 Grouser_Tracks 44875 non-null object \n", " 41 Hydraulics_Flow 44875 non-null object \n", " 42 Track_Type 102193 non-null object \n", " 43 Undercarriage_Pad_Width 102916 non-null object \n", " 44 Stick_Length 102261 non-null object \n", " 45 Thumb 102332 non-null object \n", " 46 Pattern_Changer 102261 non-null object \n", " 47 Grouser_Type 102193 non-null object \n", " 48 Backhoe_Mounting 80712 non-null object \n", " 49 Blade_Type 81875 non-null object \n", " 50 Travel_Controls 81877 non-null object \n", " 51 Differential_Type 71564 non-null object \n", " 52 Steering_Controls 71522 non-null object \n", "dtypes: float64(3), int64(5), object(45)\n", "memory usage: 166.9+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 16, "id": "e4a4e252", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAD4CAYAAADRuPC7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAkMElEQVR4nO3df5xW5Xnn8c8lEDLGAv4AKzMSNBJaTVrRKTG1zdqYOGjTMHW1pV0rTdhl19i++pOudLvbJG3XWLa1tammbsyKplGJJUiSKhrQzY9VyFCIBHXC+JMBIhgcgnEkgNf+cV2HOfMwzC84zAx836/X85pn7ufc59znfu5zrvu+z5k55u6IiIgcaScMdQFEROTYpAAjIiKVUIAREZFKKMCIiEglFGBERKQSo4e6AIN12mmn+dSpU4e6GCIiI8ratWtfcfeJR2NbIzbATJ06lZaWlqEuhojIiGJmLx6tbWmKTEREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEiP2LrLBWLZuC4tWtLK1o5PJE+pY0DSd5hn1Q10sEZFj0nETYJat28LCpRvo3LsfgC0dnSxcugFAQUZEpALHzRTZohWtB4JLoXPvfhataB2iEomIHNuOmwCztaNzQOkiInJ4jpspsskT6tjSQzCZPKFuCEojIjJwI+068nEzglnQNJ26MaO6pdWNGcWCpulDVCIRkf4rriNv6ejE6bqOvGzdlqEu2iEdNwGmeUY9N175buon1GFA/YQ6brzy3cM6+ouIFEbideTjZooMIsgooIjISDQSryMfNyMYEZGR7FDXi4fzdWQFGBGREWAkXkc+rqbIRERGqmJ6fyTdRaYAIyIyQoy068iaIhMRkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSvQrwJjZBDO738yeMbOnzey9ZnaKmT1iZpvy58ml5ReaWZuZtZpZUyn9QjPbkJ/dYmaW6WPN7L5MX21mU4/4noqIyFHV3xHM3wMPuftPAT8LPA3cAKx092nAyvwdMzsXmAOcB8wCbjWz4j+03QbMB6bla1amzwNedfdzgJuBmw5zv0REZIj1GWDMbBzwPuAOAHf/sbt3ALOBxbnYYqA5388G7nX3Pe7+PNAGzDSzM4Bx7v64uztwV02eYl33A5cWoxsRERmZ+jOCORvYAfwfM1tnZp81s7cBp7v7NoD8OSmXrwc2l/K3Z1p9vq9N75bH3fcBu4BTawtiZvPNrMXMWnbs2NHPXRQRkaHQnwAzGrgAuM3dZwA/IqfDDqGnkYf3kt5bnu4J7re7e6O7N06cOLH3UouIyJDqT4BpB9rdfXX+fj8RcF7OaS/y5/bS8meW8jcAWzO9oYf0bnnMbDQwHtg50J0REZHho88A4+7fBzabWfHYtEuBp4DlwNxMmws8kO+XA3PyzrCziIv5a3IabbeZXZTXV66tyVOs6ypgVV6nERGREaq/Dxz7XeCfzewtwHPAR4jgtMTM5gEvAVcDuPtGM1tCBKF9wPXuvj/Xcx1wJ1AHPJgviBsI7jazNmLkMucw90tERIaYjdSBQmNjo7e0tAx1MURERhQzW+vujUdjW/pLfhERqYQCjIiIVKK/12DkCFq2bguLVrSytaOTyRPqWNA0neYZ9X1nFBEZQRRgjrJl67awcOkGOvfGfQ9bOjpZuHQDgIKMiBxTNEV2lC1a0XoguBQ69+5n0YrWISqRiEg1FGCOsq0dnQNKFxEZqRRgjrLJE+oGlC4iMlIpwBxlC5qmUzdmVLe0ujGjWNA0/RA5RERGJl3kP8qKC/m6i0xEjnUKMEOgeUa9AoqIHPM0RSYiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKV6FeAMbMXzGyDma03s5ZMO8XMHjGzTfnz5NLyC82szcxazayplH5hrqfNzG4xM8v0sWZ2X6avNrOpR3g/RUTkKBvICOaX3P18d2/M328AVrr7NGBl/o6ZnQvMAc4DZgG3mtmozHMbMB+Ylq9ZmT4PeNXdzwFuBm4a/C6JiMhwcDhTZLOBxfl+MdBcSr/X3fe4+/NAGzDTzM4Axrn74+7uwF01eYp13Q9cWoxuRERkZOpvgHHgYTNba2bzM+10d98GkD8nZXo9sLmUtz3T6vN9bXq3PO6+D9gFnFpbCDObb2YtZtayY8eOfhZdRESGwuh+Lnexu281s0nAI2b2TC/L9jTy8F7Se8vTPcH9duB2gMbGxoM+FxGR4aNfIxh335o/twNfAmYCL+e0F/lzey7eDpxZyt4AbM30hh7Su+Uxs9HAeGDnwHdHRESGiz4DjJm9zcx+ongPXAZ8F1gOzM3F5gIP5PvlwJy8M+ws4mL+mpxG221mF+X1lWtr8hTrugpYlddpRERkhOrPFNnpwJfymvto4Avu/pCZfRtYYmbzgJeAqwHcfaOZLQGeAvYB17v7/lzXdcCdQB3wYL4A7gDuNrM2YuQy5wjsm4iIDCEbqQOFxsZGb2lpGepiiIiMKGa2tvTnJpXSX/KLiEglFGBERKQSCjAiIlIJBRgREamEAoyIiFRCAUZERCqhACMiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEglFGBERKQS/Q4wZjbKzNaZ2Vfy91PM7BEz25Q/Ty4tu9DM2sys1cyaSukXmtmG/OwWM7NMH2tm92X6ajObegT3UUREhsBARjC/Bzxd+v0GYKW7TwNW5u+Y2bnAHOA8YBZwq5mNyjy3AfOBafmalenzgFfd/RzgZuCmQe2NiIgMG/0KMGbWAPwy8NlS8mxgcb5fDDSX0u919z3u/jzQBsw0szOAce7+uLs7cFdNnmJd9wOXFqMbEREZmfo7gvk74E+AN0tpp7v7NoD8OSnT64HNpeXaM60+39emd8vj7vuAXcCptYUws/lm1mJmLTt27Ohn0UVEZCj0GWDM7EPAdndf28919jTy8F7Se8vTPcH9dndvdPfGiRMn9rM4IiIyFEb3Y5mLgQ+b2RXAW4FxZvZ54GUzO8Pdt+X01/Zcvh04s5S/Adia6Q09pJfztJvZaGA8sHOQ+yQjzLJ1W1i0opWtHZ1MnlDHgqbpNM+o7zujiAxrfY5g3H2huze4+1Ti4v0qd78GWA7MzcXmAg/k++XAnLwz7CziYv6anEbbbWYX5fWVa2vyFOu6Krdx0AhGjj3L1m1h4dINbOnoxIEtHZ0sXLqBZeu2DHXRROQwHc7fwXwK+KCZbQI+mL/j7huBJcBTwEPA9e6+P/NcR9wo0AY8CzyY6XcAp5pZG/CH5B1pcuxbtKKVzr37u6V17t3PohWtQ1QiETlS+jNFdoC7PwY8lu9/AFx6iOX+CvirHtJbgHf1kP4GcPVAyiLHhq0dnQNKF5GRQ3/JL0Nq8oS6AaWLyMihACNDakHTdOrGjOqWVjdmFAuapg9RiUTkSBnQFJnIkVbcLaa7yESOPQowMuSaZ9QroIgcgzRFJiIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEglFGBERKQSCjAiIlIJBRgREamEAoyIiFRCAUZERCqhACMiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqUSfAcbM3mpma8zsO2a20cw+kemnmNkjZrYpf55cyrPQzNrMrNXMmkrpF5rZhvzsFjOzTB9rZvdl+mozm1rBvoqIyFHUnxHMHuD97v6zwPnALDO7CLgBWOnu04CV+Ttmdi4wBzgPmAXcamajcl23AfOBafmalenzgFfd/RzgZuCmw981EREZSn0GGA+v5a9j8uXAbGBxpi8GmvP9bOBed9/j7s8DbcBMMzsDGOfuj7u7A3fV5CnWdT9waTG6ERGRkalf12DMbJSZrQe2A4+4+2rgdHffBpA/J+Xi9cDmUvb2TKvP97Xp3fK4+z5gF3BqD+WYb2YtZtayY8eOfu2giIgMjX4FGHff7+7nAw3EaORdvSze08jDe0nvLU9tOW5390Z3b5w4cWIfpRYRkaE0oLvI3L0DeIy4dvJyTnuRP7fnYu3AmaVsDcDWTG/oIb1bHjMbDYwHdg6kbCIiMrz05y6yiWY2Id/XAR8AngGWA3NzsbnAA/l+OTAn7ww7i7iYvyan0Xab2UV5feXamjzFuq4CVuV1GhERGaFG92OZM4DFeSfYCcASd/+KmT0OLDGzecBLwNUA7r7RzJYATwH7gOvdfX+u6zrgTqAOeDBfAHcAd5tZGzFymXMkdk5ERIaOjdSBQmNjo7e0tAx1MURERhQzW+vujUdjW/pLfhERqYQCjIiIVKI/12BERIbEsnVbWLSila0dnUyeUMeCpuk0z6jvO6MMCwowIjIsLVu3hYVLN9C5N+4R2tLRycKlGwAUZEYIBRiRYUQ99i6LVrQeCC6Fzr37WbSi9bitk5FGAUZkmFCPvbutHZ0DSpfhRxf5RYaJ3nrsx6PJE+oGlC7DjwKMyDChHnt3C5qmUzdmVLe0ujGjWNA0fYhKJAOlACMyTKjH3l3zjHpuvPLd1E+ow4D6CXXceOW7j8vpwpFK12BEhokFTdO7XYMB9dibZ9QroIxgCjAiw0RxItVdZHKsUIARGUbUY5djia7BiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEgl9Jf8IiJ90IPgBkcBRkSkF3oQ3OApwIjIQdRj76JHNw+eAoyIdKMee3d6ENzg6SK/iHSjRzd3pwfBDV6fAcbMzjSzR83saTPbaGa/l+mnmNkjZrYpf55cyrPQzNrMrNXMmkrpF5rZhvzsFjOzTB9rZvdl+mozm1rBvopIP6jH3p0e3Tx4/RnB7AP+yN1/GrgIuN7MzgVuAFa6+zRgZf5OfjYHOA+YBdxqZsW3cxswH5iWr1mZPg941d3PAW4GbjoC+yYig6Aee3d6dPPg9XkNxt23Advy/W4zexqoB2YDl+Rii4HHgP+a6fe6+x7geTNrA2aa2QvAOHd/HMDM7gKagQczz8dzXfcDnzYzc3c/7D0UkQHRo5sPpgfBDc6ALvLn1NUMYDVwegYf3H2bmU3KxeqBJ0rZ2jNtb76vTS/ybM517TOzXcCpwCsDKZ+IHD49ulmOlH4HGDM7CfgX4Pfd/Yd5+aTHRXtI817Se8tTW4b5xBQbU6ZM6avIIjJI6rHLkdCvu8jMbAwRXP7Z3Zdm8stmdkZ+fgawPdPbgTNL2RuArZne0EN6tzxmNhoYD+ysLYe73+7uje7eOHHixP4UXUREhkh/7iIz4A7gaXf/29JHy4G5+X4u8EApfU7eGXYWcTF/TU6n7Tazi3Kd19bkKdZ1FbBK119EREa2/kyRXQz8FrDBzNZn2p8CnwKWmNk84CXgagB332hmS4CniDvQrnf34mrhdcCdQB1xcf/BTL8DuDtvCNhJ3IUmIiIjmI3UgUJjY6O3tLQMdTFEREYUM1vr7o1HY1v6S34REamEAoyIiFRCAUZERCqhACMiIpVQgBERkUoowIiISCUUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKqEAIyIilVCAERGRSijAiIhIJRRgRESkEgowIiJSCQUYERGphAKMiIhUQgFGREQqoQAjIiKVUIAREZFKKMCIiEglFGBERKQSo4e6ACLDxbJ1W1i0opWtHZ1MnlDHgqbpNM+oH+piiYxYCjAiRHBZuHQDnXv3A7Clo5OFSzcAKMiIDJKmyESARStaDwSXQufe/Sxa0TpEJRIZ+RRgRICtHZ0DSheRvvUZYMzsc2a23cy+W0o7xcweMbNN+fPk0mcLzazNzFrNrKmUfqGZbcjPbjEzy/SxZnZfpq82s6lHeB/lEJat28LFn1rFWTd8lYs/tYpl67YMdZGGzOQJdQNKF5G+9WcEcycwqybtBmClu08DVubvmNm5wBzgvMxzq5mNyjy3AfOBafkq1jkPeNXdzwFuBm4a7M5I/xXXHLZ0dOJ0XXM4XoPMgqbp1I0Z1S2tbswoFjRNH6ISiYx8fQYYd/86sLMmeTawON8vBppL6fe6+x53fx5oA2aa2RnAOHd/3N0duKsmT7Gu+4FLi9GNVEfXHLprnlHPjVe+m/oJdRhQP6GOG698ty7wixyGwd5Fdrq7bwNw921mNinT64EnSsu1Z9refF+bXuTZnOvaZ2a7gFOBV2o3ambziVEQU6ZMGWTRBXTNoSfNM+oVUESOoCN9kb+nkYf3kt5bnoMT3W9390Z3b5w4ceIgiyigaw4iUr3BBpiXc9qL/Lk909uBM0vLNQBbM72hh/RuecxsNDCeg6fk5AjTNQcRqdpgA8xyYG6+nws8UEqfk3eGnUVczF+T02m7zeyivL5ybU2eYl1XAavyOo1USNccRKRqfV6DMbN7gEuA08ysHfhz4FPAEjObB7wEXA3g7hvNbAnwFLAPuN7diyvJ1xF3pNUBD+YL4A7gbjNrI0Yuc47InkmfdM1BRKpkI3Ww0NjY6C0tLUNdDBGREcXM1rp749HYlv6SX0REKqEAIyIilVCAERGRSijAiIhIJUbsRX4z2wG8OMjsp9HDfwoYAipHdyrH8CoDqBy1joVyvN3dj8pfqo/YAHM4zKzlaN1FoXKoHCO1DCqHynG4NEUmIiKVUIAREZFKHK8B5vahLkBSObpTOboMhzKAylFL5RiA4/IajIiIVO94HcGIiEjFFGBERKQa7n7EX8DniGfEfLcm/WpgI/Am0NhDvrXAW4C/Ip5y+VoPy/wa8d+aNwJfqPnsIeIJmf8MtALfzbKcBKwBvgP8gPivzU8CF2S+9xL/3Xkz8Abw/SzL+/Pz38317czXM8C/L233DGAD8Xc5ncQzbp4Efr20zM8DPwT25+tFYCrxwLVbcr37s96ez/WtBT4ArAO+knVzO/C9ogzAWOC+zNMBXAE8nvVzoAzAhZnnjaz/jqyPtZm/Iz97AVhKPFX0UeA14B+LMuS6fj3XvRH461IZ2oB/A74OnF9bjizDhtzGDmAP8Gp+vjfrYHem/wPwv4EPZhl35jLP13zf38ltfIb4T939LUdR33uBTZnvpKyjH2f6j4g2dEFpm5tzXTuy7K/VtMEpwGP52Z5c39RDtNEXss5/CHwa+INc9w9y+3uyvqbSSxvNfXwi62IH8R/O+91G6TpeW4njZHe+NhFt7U3gXuDlLNNLxCM2iuP1f2ad7aupi5uB9Vmu3Vnfq4v64ODjtfiP6m353RV1sZton5sz/6/2UhcfJ5411ZmvZ4m/F/m7AR6vZ9XUxRrgk6UyvVFTF+8F/l9+/mqW4TvAJTV18mdEe3wFeB14GlhJ/G1KsczcXO+erPNbiPNET2WaCizIel5PtNf9wCk9nDuL46+tWOchzt8Lc5lWoGmg+butq6IA8z7gAg4OMD8NTCcOwMaaz6YCy/P9RdkIag/eacSJ7uT8fVLpszri2TMQJ1nL1z3EowJOyvSHspHOA1bn8p/MRvoh4O3ZMD4MbAF+Cfga8Jf5mkSM/E4rbft3stH9XH4Jz+W+bgMm5DLLgWXEifAOYBdxkFyR63+Vrgb8InAy8K5c7gtEgPkE8Je5vhOIP7b6WK7zI8DdwFeBabnM5KIM2RhXEo9DeB14OOvlY8C/ZlnmZNm/B7wN+AXgvwDfLJXhVKLxT8xtLAb+HvhM/n4b0dDf2UM51hIH4pJc5vIs+71ZpguAPyU6EA8TAXRG5n8f0UHZW6r3cfnTgH/Jeu1vOa7LbXSWynFP7uOHMu/TWadFO7mMCAYnAhcTJ8Ef1bTR23M9nwHOJU749x2ija7J+riHCAjfB5qJNvAs8Bd0Bb/e2ujDuQ+fyHU9xsDa6BVZ97uKOszX/UTbeJI4Zp7LOmgh2sCDue6/Jzol3QJMadsfI9rQ54g2dl8PdWFZHw8DN2R7+ATRLtfmNldn/o291MXHgT/uoeP6vgEer0tq6uILRGermTheXwMWZbmey2VfAn6feCzJ14D35LZPKG27NT//JeK5WPcRbbFoI6fk+tZmXT8HPJLfb22ZPlvkK63/V4hnavX0PRTtzbJeL+9hmXOzPscSAe1ZYFR/89e+Kpkic/ev08NTKd39aXdvPUS2y4mTP+7+hMdDymr9J+Af3f3VXG576bNLiAMLd/9XT0SlNLj7a8Bs4gAcQ0T6CflEztnARnf/iru/SJzwzgPeClxPPP9mLnCju2939zfdvfxXtP8B+Jq7f9vd1xIN4nyiVzgxH7L288SJaTHRMPZk2mziRPYG8DfEM3q+Dswiem1FHoCPAjfmPhZlmJ2fzyJGE+8hehi4+9Ysw7nAOOBniZPGj4gecnPm3wPclZ+dSTSgce7+TeJE8PYsM8DZwPfcfUf+/jWiR1mU8dRcfpO7byqVYycR6J4gepqLcvuLie++zd3/LfdhKtGZ+Jq7r3P3rdmmvg2YmY3N9f4wtzma6Ek3DqAcn8nlXymV44PE6PlXsnz1xIm6aCdXAxvc/XV3/xbxPdc+U8mJk9Zi4umsm4BLsw1cQrZRoqM0zt0fJ9ro80QnqJl4ltI24BvE6OED9N5GIb7fjwIrgK0DbKObsl7eRnz/i/M1DphJdGReAB5x94eBn8jybTOzBqIj9GkObXau6x6ijV1KzfEK/GQu8yDRuewkRlefzzLdTXxv3yQ6qYeqi26PaTWzaUSn8Bv9rIvieH1/TV3UE8dCM9EZ+EHm+wngW8Tx9yjZboH/C/w74hhuzLKMy7L8k7s/SgStS4ljonjib1Ou721Z14/k9pp7KNMUutpW4TeynrvJ9jvO3R/P8+Jduc5as4F73X2Puz9PnEtmDiB/N8PpGswsMsD04p3AO83sW2b2hJnNKn12IEAVzGwM8FvAQ2Y2iuj93EYcKKuJofG5RKN8vpS1negxrwPOIQ7wScAzZvaMmX3RzE7PbYwivugNNfkvIk56vwX8JtHQJhPDeoiD5QfEybQO6HT3XZn3NaJBf5GY6tlD14nsL8zs30plqCd6btPdfQPRCz01y/a9LMNeYqjd4e778vdfzH18F3GQbM4yFSe34klkv0kM/d/M39uAnzKzqfmI6+bMtznr4p3Eyaoow3ozm0kEyuczvYPo7dXn/r6N6J2R5dsTb30X3V0OvOHue4oEM1tBnBh2E9/jQMrxw6yLohwnZT3UE6PIUcSJqT3TzgYmm9mpZnYiXT3vso8T7WEp0fv/3dJ3cjlwZS5XD7SX2uhXiJP4NfmdbM8TTHvWx1s4dBv9PeB/ESfmfwB+ZhBtdDYxtVK00fZ8v4uuILa5lHcSMYr4O+BP6GofPZkKnE70rPflOn+VOC7Xl+pjS5blfiKwvJMIXG8hRqjtRHDZT9cj12vrYj/wO2b2pJl9jhjZ3+fFUHdwx2s78f29Snw/84hjqfh+9uV230scH1cSbeM8orNRPEb+A0R72wwH2vouohP7YNZFPdGW20tlOyHrsLZMxfdTtPETifPovxz8FRxo4+V9rs98HzazT5aW29zDcofM35thEWDM7C3EKOO5PhYdTfQQLiEi9WfNbEJ+djHRuym7Ffi6u3/D48ma3yS+/Jlm9q5c5iJiSFhWT4wu/nNu8xSi8X2aGDo+ThzQECOGzTX5T8ryfcTd/wcx7Ic4GZ0I/BNxEvdMm0KOOkouIoLfb5byNgDfcvcLSmUwYhppdSmvZ4/DiQOs1sdy+eJEe3KmX0ZMTXiu40PESbgYrZCjx+uIof03iJNi4T2lchT3v19O9D6LBlw+ITsxmhxFnGALY4ih+QFmdh4xdVI+seDuTcSJdSxR74MtR7fNEdMXTgR5sowTiGnSR+i6/lPrN4gTwc8RJ5m7S9u4mPhOy9u/lRixPkf04h8lgu+JZnZNLjOWmBYqK7fR64D/nmW8nTjxD7SNFj38Q9WL1bw/iQgy23MU0JvxwFe96+m2ECOjb7r7+aV1/hRRF98lpmeL0eUJRECCOC5+XLP+cl3cBryDGJFsI+qm3KMf6PFa3ucTiXq6ExhT+n4aiIB1ExGgZhLnqV8gOmf7crlZROexdtvnA4uyLoqp/bJDfQ9e8/NXiPPDQbNHPazzQD53X5773dtyh8zfm2ERYIjedG1w6Ek78IC7783hWyswzczOBja7+4GGZ2Z/DkwE/rAm/8nE0HwW0TBmEA3qzMzXAPwRcI+7P5t5vkCceBcRPbWVxJw1xInrW6X8xVTFF9z9iVzmFeLktJUYWj6aZR9PnOSnECdyskyTsnx7clv3Zh3tB76Uy30xy9BO9D4fyhHFeKKX9FXgz7IM7UQPckIuU0c09KIn9JYs/xVEm/jJLOvFWT/XZhneb2afd/cvu/t73P29uR8/yPyXZ12OB3ZmXXyVuLD5UO5bURdTchtXZXnrsv5GEz3mAz3M/E6+RHyXtScX3P0N4hrXCQMsxzgimG3NtNcy/4nEHPkbxCioIcv7TXe/w90vcPf35We1Pfd5WSdn5vTXW3N/x9O9jbYTU5ZFG23KMj1HtIU1xEmzgTgJfZlDt9G5xJTJ68QF35l0tQ/oXxvdSQSoLblcA3GCHp/14HT1xKdlWScCHzazF4j2McrMPl/7/WR9PpHbHk0cgy+Uj1fi5p1xWRcfyDp4heilv0h0KhtK+zSpp7pw95fdfb+7v0mcU06sCYADOV7LdbGPaF/fy/ffp+v7eQfwjVLbaCAC/d/kejblemfm+2LblxEdvCtKo/JiJF1MmTVk3b/QQ5mK76cIKHPoYXqstN6G0u8N1HTWSsud2cNy/c3fzXAJMLOIude+LCMOfMzsNGII/Rw102Nm9h+JA/Y33P1NM5uYI53lwG8TDdiJ4eU04i6WaWb2M8S0xo+IqYZim+8nDvBriJPxDOJCNMQc6i3AZWY2KZc7ga6eMjk8f5S4aHYi0SPdDKzKMtUDF5rZB4m55w8B17v7JHefSjScVcTUwSWl7T6V+a8gAtFVRPD8EnCXu38xt7+NGHZ/h5h//kiW4cEsaycRRH6RuLi4y923uftC4oC/qyiDu1+T+4mZnUyMhu4iTnKXEieJVcSJ+0A5SmV4T9bFAuABYD4xtTE39+sq4gDemtuYQASHhVk2Mv2kHKUVJ63i7rmBlONx4vrCA5nva0QQegdxUl2Vy+0iTg4PlvZ9CjEaLnqnhZeIE+JcM/tp4qSwkoOncH+ZCD43E21xJhFkVxCj5l/P72VU/uytjW4l5vu/TNyJtomu9gH9aKPpR8SJZG6+ijb6CjFFc1m20TFEkPiIuzeU2uh+d7+mvEIzm050IIrAcBXRqao9Xi8mAvPMrMNGYprx2izXO4jvoTHz9lgXRZtIf0TXyb0wkOO1XBcvEh2+FcQNAT9HdC6K6ezbzWySmZ2Y+3wlEQD2uftTOQJ/hjhe55rZDGJ0+7B3v5a8ghj5vJ51fVlu74EeyrSZOCbdzMYTbeABelC0ezO7KK/ZXHuIZZcDc8xsrJmdRZwf1wwg/0EbruIusnuycvdmhczL9F+la075ZWBFpn8bqCvl/+tc7s38+fFiChX4W+LA2QDMyfQv0/1W0H3EFMv6fBW32T5J123KG4jh7J3edSfLK7nNbaW89cR8cCtxYnqWOGFMIXpwqzL/R4lejRMHSJH/n4g7XH4tPytuU3498zQSF/m3Z73syp/rS69mYgrp7cQUwpOlMjRk3jai1/sHWe/riRPTemII3pj7sCe335H1+OdEb3d3pj9D3uFH9Jp2EgfRDuCx0vf7VL7mECfKB3Kf1hDXKq7JchRlWE9M932XOFhfzrrck/vwReLEszdfHcR3/xfECeTVTHfihPoHRLspbpf+B+JaTn/L0UG0E8+f3yaC0mulMvyYaC+N+XkdMUXyVNb5Dg5uo+cS0yIddN2mfDbZRoH1pTbanuXaQ0zpfSKXL27J3kN8r/f30UY/RATfp4ie/yYG1kbbStv8MRFcX8v1bMtyvJ4/i9f8muN1R673QF3kZx8nRv5fpKuNrqTrVuX1dB2vrVkfO3O54mL6buK4+H5u5+xe6mIJcWw/SbSbXyyVZSDH69nEd/5avr5N3C1XlKm4Tfn7xMgWom1sys9eJDosb8/P/pjo3L416+L1rO+ncrvL6WobHyWCxx6inX2aOPf1VKazM89vExfnezsvNxLH37PFOjP9w8AnS8v9t9L3cXlf+XvdZhUBZoDBqIG83XGQ+ccCLYPM+2dkkBpk/muAGw5z/4e8DCpHn+s8rtuo6uLwy0RctzvjSG57JLz0v8hERKQSw+UajIiIHGMUYEREpBIKMCIiUgkFGBERqYQCjIiIVEIBRkREKvH/AQqaVwTx6jxHAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots()\n", "ax.scatter(df['saledate'][:1000],df['SalePrice'][:1000]);" ] }, { "cell_type": "code", "execution_count": 13, "id": "ed600fdb", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAD4CAYAAADRuPC7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAaoklEQVR4nO3dcZSV9X3n8fcnTIKaBAUZDZmhHYycpOhJVpkQ0mS7NjRC1Irp0e54mpVNadlatpu025NA7InZ5HAONG1M2K4mNFKBGIXQJLJxXUOwabbnGHCMGkClTIXoBCrThUWaRgzmu388v1ueud4ZLsP87r0zfl7nPOc+z/d5fs/zvaPMd37P73efq4jAzMxstL2m2QmYmdn45AJjZmZZuMCYmVkWLjBmZpaFC4yZmWXR1uwEWsXUqVOjq6ur2WmYmY0pjz766D9FRHutfS4wSVdXF729vc1Ow8xsTJH0o6H2+RaZmZll4QJjZmZZuMCYmVkWLjBmZpaFC4yZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeFP8o+SrmX3N+W6+1de3ZTrmpmdinswZmaWhQuMmZll4QJjZmZZuMCYmVkWLjBmZpaFC4yZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeECY2ZmWWQrMJLWSjokaVeNfX8sKSRNLcWWS+qTtEfS/FJ8tqSdad9qSUrxiZI2pvh2SV2lNosk7U3Lolzv0czMhpazB3MXsKA6KGk68H7g2VJsFtADXJLa3C5pQtp9B7AEmJmWyjkXA0ci4mLgNmBVOtcU4FbgXcAc4FZJk0f5vZmZ2SlkKzAR8T3gcI1dtwEfA6IUWwjcGxHHI2If0AfMkTQNmBQRD0dEAOuB60pt1qX1zcC81LuZD2yNiMMRcQTYSo1CZ2ZmeTV0DEbStcCPI+KJql0dwHOl7f4U60jr1fFBbSLiBHAUOH+Yc9XKZ4mkXkm9AwMDI3pPZmZWW8MKjKRzgFuAT9baXSMWw8RH2mZwMGJNRHRHRHd7e3utQ8zMbIQa2YN5CzADeELSfqAT+IGkN1H0MqaXju0EDqR4Z4045TaS2oBzKW7JDXUuMzNroIYVmIjYGREXRERXRHRRFILLI+IfgS1AT5oZNoNiMH9HRBwEjkmam8ZXbgLuS6fcAlRmiF0PPJTGaR4ErpQ0OQ3uX5liZmbWQNm+MlnSPcAVwFRJ/cCtEXFnrWMjYrekTcCTwAlgaUS8nHbfTDEj7WzggbQA3AlskNRH0XPpSec6LOkzwCPpuE9HRK3JBmZmllG2AhMRN55if1fV9gpgRY3jeoFLa8RfBG4Y4txrgbWnka6ZmY0yf5LfzMyycIExM7MsXGDMzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLFxgzMwsCxcYMzPLwgXGzMyycIExM7MsXGDMzCwLFxgzM8vCBcbMzLJwgTEzsyyyFRhJayUdkrSrFPuspKcl/VDSNySdV9q3XFKfpD2S5pfisyXtTPtWS1KKT5S0McW3S+oqtVkkaW9aFuV6j2ZmNrScPZi7gAVVsa3ApRHxduDvgeUAkmYBPcAlqc3tkiakNncAS4CZaamcczFwJCIuBm4DVqVzTQFuBd4FzAFulTQ5w/szM7NhZCswEfE94HBV7NsRcSJtfh/oTOsLgXsj4nhE7AP6gDmSpgGTIuLhiAhgPXBdqc26tL4ZmJd6N/OBrRFxOCKOUBS16kJnZmaZNXMM5reBB9J6B/BcaV9/inWk9er4oDapaB0Fzh/mXK8gaYmkXkm9AwMDZ/RmzMxssKYUGEm3ACeAuyuhGofFMPGRthkcjFgTEd0R0d3e3j580mZmdlraGn3BNOh+DTAv3faCopcxvXRYJ3AgxTtrxMtt+iW1AedS3JLrB66oavPdUX0TLaRr2f1Nu/b+lVc37dpm1voa2oORtAD4OHBtRPxLadcWoCfNDJtBMZi/IyIOAsckzU3jKzcB95XaVGaIXQ88lArWg8CVkianwf0rU8zMzBooWw9G0j0UPYmpkvopZnYtByYCW9Ns4+9HxO9FxG5Jm4AnKW6dLY2Il9OpbqaYkXY2xZhNZdzmTmCDpD6KnksPQEQclvQZ4JF03KcjYtBkAzMzyy9bgYmIG2uE7xzm+BXAihrxXuDSGvEXgRuGONdaYG3dyZqZ2ajzJ/nNzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLFxgzMwsCxcYMzPLwgXGzMyycIExM7MsXGDMzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsshWYCStlXRI0q5SbIqkrZL2ptfJpX3LJfVJ2iNpfik+W9LOtG+1JKX4REkbU3y7pK5Sm0XpGnslLcr1Hs3MbGg5ezB3AQuqYsuAbRExE9iWtpE0C+gBLkltbpc0IbW5A1gCzExL5ZyLgSMRcTFwG7AqnWsKcCvwLmAOcGu5kJmZWWNkKzAR8T3gcFV4IbAura8DrivF742I4xGxD+gD5kiaBkyKiIcjIoD1VW0q59oMzEu9m/nA1og4HBFHgK28stCZmVlmjR6DuTAiDgKk1wtSvAN4rnRcf4p1pPXq+KA2EXECOAqcP8y5XkHSEkm9knoHBgbO4G2ZmVm1VhnkV41YDBMfaZvBwYg1EdEdEd3t7e11JWpmZvVpdIF5Pt32Ir0eSvF+YHrpuE7gQIp31ogPaiOpDTiX4pbcUOcyM7MGanSB2QJUZnUtAu4rxXvSzLAZFIP5O9JttGOS5qbxlZuq2lTOdT3wUBqneRC4UtLkNLh/ZYqZmVkDteU6saR7gCuAqZL6KWZ2rQQ2SVoMPAvcABARuyVtAp4ETgBLI+LldKqbKWaknQ08kBaAO4ENkvooei496VyHJX0GeCQd9+mIqJ5sYGZmmdVVYCRdGhG7Tn3kSRFx4xC75g1x/ApgRY14L3BpjfiLpAJVY99aYG3dyZqZ2air9xbZFyXtkPT7ks7LmZCZmY0PdRWYiHgv8FsUg+e9kr4q6f1ZMzMzszGt7kH+iNgL/AnwceDfAaslPS3pN3IlZ2ZmY1ddBUbS2yXdBjwFvA/49Yj4pbR+W8b8zMxsjKp3FtlfAH8JfCIifloJRsQBSX+SJTMzMxvT6i0wVwE/rUwdlvQa4KyI+JeI2JAtOzMzG7PqHYP5DsXnUCrOSTEzM7Oa6i0wZ0XEP1c20vo5eVIyM7PxoN4C8xNJl1c2JM0GfjrM8WZm9ipX7xjMR4GvSao8NHIa8O+zZGRmZuNCXQUmIh6R9DbgrRSPw386In6WNTMzMxvTTudhl+8EulKbyyQREeuzZGVmZmNevQ+73AC8BXgcqDzluPIVxmZmZq9Qbw+mG5iVvm/FzMzslOqdRbYLeFPORMzMbHyptwczFXhS0g7geCUYEddmycrMzMa8egvMp3ImYWZm40+905T/VtIvAjMj4juSzgEm5E3NzMzGsnof1/+7wGbgSynUAXxzpBeV9IeSdkvaJekeSWdJmiJpq6S96XVy6fjlkvok7ZE0vxSfLWln2rdaklJ8oqSNKb5dUtdIczUzs5Gpd5B/KfAe4AX41y8fu2AkF5TUAfwXoDsiLqXoCfUAy4BtETET2Ja2kTQr7b8EWADcLqnSe7oDWALMTMuCFF8MHImIiym+r2bVSHI1M7ORq7fAHI+IlyobktooPgczUm3A2ek85wAHgIXAurR/HXBdWl8I3BsRxyNiH9AHzJE0DZgUEQ+n6dPrq9pUzrUZmFfp3ZiZWWPUW2D+VtInKIrC+4GvAf9zJBeMiB8DfwY8CxwEjkbEt4ELI+JgOuYgJ3tIHcBzpVP0p1hHWq+OD2oTESeAo8D51blIWiKpV1LvwMDASN6OmZkNod4CswwYAHYC/wn4X8CIvskyja0sBGYAbwZeL+lDwzWpEYth4sO1GRyIWBMR3RHR3d7ePnziZmZ2WuqdRfZziq9M/stRuOavAfsiYgBA0teBXwaelzQtIg6m21+H0vH9wPRS+06KW2r9ab06Xm7Tn27DnQscHoXczcysTvXOItsn6ZnqZYTXfBaYK+mcNC4yD3gK2AIsSscsAu5L61uAnjQzbAbFYP6OdBvtmKS56Tw3VbWpnOt64CE/5sbMrLFO51lkFWcBNwBTRnLBiNguaTPwA+AE8BiwBngDsEnSYooidEM6frekTcCT6filEVF54ObNwF0UX+f8QFoA7gQ2SOqj6Ln0jCRXMzMbOY30D3tJfxcR7x3lfJqmu7s7ent7R9y+a9n9o5jN2LB/5dXNTsHMmkzSoxHRXWtfvY/rv7y0+RqKHs0bRyE3MzMbp+q9RfbnpfUTwH7gN0c9GzMzGzfqnUX2q7kTMTOz8aXeW2R/NNz+iPjc6KRjZmbjxenMInsnxfRfgF8HvsfgT9ibmZn9q9P5wrHLI+IYgKRPAV+LiN/JlZiZmY1t9T4q5heAl0rbLwFdo56NmZmNG/X2YDYAOyR9g+KZXh+keHqxmZlZTfXOIlsh6QHg36bQhyPisXxpmZnZWFfvLTIovrflhYj4AsVDJGdkysnMzMaBeh92eSvwcWB5Cr0W+EqupMzMbOyrtwfzQeBa4CcAEXEAPyrGzMyGUW+BeSk97j4AJL0+X0pmZjYe1FtgNkn6EnCepN8FvsPofPmYmZmNU6ecRZa+zGsj8DbgBeCtwCcjYmvm3MzMbAw7ZYGJiJD0zYiYDbiomJlZXeq9RfZ9Se/MmomZmY0r9X6S/1eB35O0n2ImmSg6N2/PlZiZmY1tw/ZgJP1CWv0AcBHwPoonKV+TXkdE0nmSNkt6WtJTkt4taYqkrZL2ptfJpeOXS+qTtEfS/FJ8tqSdad/qNF6EpImSNqb4dkldI83VzMxG5lS3yL4JEBE/Aj4XET8qL2dw3S8A/zsi3ga8A3gKWAZsi4iZwLa0jaRZQA9wCbAAuF3ShHSeO4AlwMy0LEjxxcCRiLgYuA1YdQa5mpnZCJyqwKi0ftFoXFDSJOBXgDsBIuKliPh/wEJgXTpsHXBdWl8I3BsRxyNiH9AHzJE0DZgUEQ+nz+isr2pTOddmYF6ld2NmZo1xqgITQ6yfiYuAAeCvJD0m6cvpg5sXRsRBgPR6QTq+g8FfbNafYh1pvTo+qE1EnACOAuePUv5mZlaHUxWYd0h6QdIx4O1p/QVJxyS9MMJrtgGXA3dExGUUkwaWDXN8rZ5HDBMfrs3gE0tLJPVK6h0YGBg+azMzOy3DFpiImBARkyLijRHRltYr25NGeM1+oD8itqftzRQF5/l024v0eqh0/PRS+07gQIp31ogPaiOpDTgXOFzj/a2JiO6I6G5vbx/h2zEzs1rqnaY8aiLiHyU9J+mtEbEHmAc8mZZFwMr0el9qsgX4qqTPAW+mGMzfEREvp57UXGA7cBPw30ttFgEPA9cDD6VxGhtFXcvub8p196+8uinXNbPT0/ACk/wBcLek1wHPAB+m6E1tkrQYeBa4ASAidkvaRFGATgBLI+LldJ6bgbuAs4EH0gLFBIINkvooei49jXhTZmZ2UlMKTEQ8DnTX2DVviONXACtqxHuBS2vEXyQVKDMza47T+UZLMzOzurnAmJlZFi4wZmaWhQuMmZll4QJjZmZZuMCYmVkWLjBmZpaFC4yZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeECY2ZmWbjAmJlZFi4wZmaWhQuMmZll4QJjZmZZuMCYmVkWLjBmZpZF0wqMpAmSHpP0rbQ9RdJWSXvT6+TSscsl9UnaI2l+KT5b0s60b7UkpfhESRtTfLukroa/QTOzV7lm9mA+AjxV2l4GbIuImcC2tI2kWUAPcAmwALhd0oTU5g5gCTAzLQtSfDFwJCIuBm4DVuV9K2ZmVq0pBUZSJ3A18OVSeCGwLq2vA64rxe+NiOMRsQ/oA+ZImgZMioiHIyKA9VVtKufaDMyr9G7MzKwxmtWD+TzwMeDnpdiFEXEQIL1ekOIdwHOl4/pTrCOtV8cHtYmIE8BR4PzqJCQtkdQrqXdgYOAM35KZmZU1vMBIugY4FBGP1tukRiyGiQ/XZnAgYk1EdEdEd3t7e53pmJlZPdqacM33ANdKugo4C5gk6SvA85KmRcTBdPvrUDq+H5heat8JHEjxzhrxcpt+SW3AucDhXG/IzMxeqeE9mIhYHhGdEdFFMXj/UER8CNgCLEqHLQLuS+tbgJ40M2wGxWD+jnQb7ZikuWl85aaqNpVzXZ+u8YoejJmZ5dOMHsxQVgKbJC0GngVuAIiI3ZI2AU8CJ4ClEfFyanMzcBdwNvBAWgDuBDZI6qPoufQ06k2YmVmhqQUmIr4LfDet/19g3hDHrQBW1Ij3ApfWiL9IKlBmZtYc/iS/mZll4QJjZmZZuMCYmVkWLjBmZpZFK80iM6tL17L7m3Ld/Suvbsp1zcYq92DMzCwLFxgzM8vCBcbMzLJwgTEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLFxgzMwsCxcYMzPLwgXGzMyycIExM7Ms/LBLszo16yGb4Adt2tjU8B6MpOmS/kbSU5J2S/pIik+RtFXS3vQ6udRmuaQ+SXskzS/FZ0vamfatlqQUnyhpY4pvl9TV6PdpZvZq14xbZCeA/xoRvwTMBZZKmgUsA7ZFxExgW9om7esBLgEWALdLmpDOdQewBJiZlgUpvhg4EhEXA7cBqxrxxszM7KSGF5iIOBgRP0jrx4CngA5gIbAuHbYOuC6tLwTujYjjEbEP6APmSJoGTIqIhyMigPVVbSrn2gzMq/RuzMysMZo6yJ9uXV0GbAcujIiDUBQh4IJ0WAfwXKlZf4p1pPXq+KA2EXECOAqcX+P6SyT1SuodGBgYpXdlZmbQxAIj6Q3AXwMfjYgXhju0RiyGiQ/XZnAgYk1EdEdEd3t7+6lSNjOz09CUAiPptRTF5e6I+HoKP59ue5FeD6V4PzC91LwTOJDinTXig9pIagPOBQ6P/jsxM7OhNGMWmYA7gaci4nOlXVuARWl9EXBfKd6TZobNoBjM35Fuox2TNDed86aqNpVzXQ88lMZpzMysQZrxOZj3AP8B2Cnp8RT7BLAS2CRpMfAscANAROyWtAl4kmIG2tKIeDm1uxm4CzgbeCAtUBSwDZL6KHouPZnfk5mZVWl4gYmIv6P2GAnAvCHarABW1Ij3ApfWiL9IKlBmZtYcflSMmZll4QJjZmZZuMCYmVkWftil2RjQrAdt+iGbdibcgzEzsyxcYMzMLAsXGDMzy8IFxszMsnCBMTOzLDyLzMyG5K+JtjPhHoyZmWXhAmNmZlm4wJiZWRYuMGZmloULjJmZZeECY2ZmWXiaspm1JD/gc+xzD8bMzLJwgTEzsyzG9S0ySQuALwATgC9HxMomp2RmLc5PLxg947YHI2kC8D+ADwCzgBslzWpuVmZmrx7juQczB+iLiGcAJN0LLASebGpWZmZDGG8TG8ZzgekAnitt9wPvKh8gaQmwJG3+s6Q9VeeYCvxTtgxH31jLF8ZezmMtXxh7OTvf/AblrFVndK5fHGrHeC4wqhGLQRsRa4A1Q55A6o2I7tFOLJexli+MvZzHWr4w9nJ2vvk1KudxOwZD0WOZXtruBA40KRczs1ed8VxgHgFmSpoh6XVAD7ClyTmZmb1qjNtbZBFxQtJ/Bh6kmKa8NiJ2n+Zphrx91qLGWr4w9nIea/nC2MvZ+ebXkJwVEac+yszM7DSN51tkZmbWRC4wZmaWhQvMECQtkLRHUp+kZQ287nRJfyPpKUm7JX0kxadI2ippb3qdXGqzPOW5R9L8Uny2pJ1p32pJSvGJkjam+HZJXaOQ9wRJj0n61hjJ9zxJmyU9nX7W727lnCX9Yfr/YZekeySd1Wr5Slor6ZCkXaVYQ3KUtChdY6+kRWeQ72fT/xM/lPQNSee1Sr5D5Vza98eSQtLUlsk5IrxULRSTAv4BuAh4HfAEMKtB154GXJ7W3wj8PcWjbv4UWJbiy4BVaX1Wym8iMCPlPSHt2wG8m+IzQQ8AH0jx3we+mNZ7gI2jkPcfAV8FvpW2Wz3fdcDvpPXXAee1as4UHxreB5ydtjcB/7HV8gV+Bbgc2FWKZc8RmAI8k14np/XJI8z3SqAtra9qpXyHyjnFp1NMaPoRMLVVcs7+C3MsLukH/2BpezmwvEm53Ae8H9gDTEuxacCeWrml/8nenY55uhS/EfhS+Zi03kbxiV6dQY6dwDbgfZwsMK2c7ySKX9iqirdkzpx8KsWUdK5vUfwibLl8gS4G/8LOnmP5mLTvS8CNI8m3at8HgbtbKd+hcgY2A+8A9nOywDQ9Z98iq63WY2Y6Gp1E6p5eBmwHLoyIgwDp9YJ02FC5dqT16vigNhFxAjgKnH8GqX4e+Bjw81KslfO9CBgA/krFbb0vS3p9q+YcET8G/gx4FjgIHI2Ib7dqvlUakWOuf6+/TfHXfUvnK+la4McR8UTVrqbn7AJT2ykfM5M9AekNwF8DH42IF4Y7tEYshokP1+a0SboGOBQRj9bbZIhrNyTfpI3iNsMdEXEZ8BOK2zdDafbPeDLFg1pnAG8GXi/pQ8M1GeLajfwZn8po5jjquUu6BTgB3H0G186er6RzgFuAT9baPYLrj2rOLjC1NfUxM5JeS1Fc7o6Ir6fw85Kmpf3TgEOnyLU/rVfHB7WR1AacCxweYbrvAa6VtB+4F3ifpK+0cL6V8/VHxPa0vZmi4LRqzr8G7IuIgYj4GfB14JdbON+yRuQ4qv9e0wD2NcBvRbof1ML5voXiD48n0r/BTuAHkt7UEjmP5J7weF8o/sJ9Jv2HqwzyX9KgawtYD3y+Kv5ZBg+W/mlav4TBA3nPcHIg7xFgLicH8q5K8aUMHsjbNEq5X8HJMZiWzhf4P8Bb0/qnUr4tmTPFU8B3A+ek66wD/qAV8+WVYzDZc6QYm9pHMfg8Oa1PGWG+Cyi+0qO96riWyLdWzlX79nNyDKbpOWf/hTlWF+Aqihlc/wDc0sDrvpei6/lD4PG0XEVxH3QbsDe9Tim1uSXluYc0GyTFu4Fdad9fcPLJDWcBXwP6KGaTXDRKuV/ByQLT0vkC/wboTT/nb6Z/NC2bM/DfgKfTtTakXxotlS9wD8UY0c8o/uJd3KgcKcZL+tLy4TPIt49irOHxtHyxVfIdKueq/ftJBaYVcvajYszMLAuPwZiZWRYuMGZmloULjJmZZeECY2ZmWbjAmJlZFi4wZmaWhQuMmZll8f8Bis5vTEC2MfMAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df['SalePrice'].plot.hist();" ] }, { "cell_type": "code", "execution_count": 22, "id": "a6d54b7d", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('./data/buldozer/TrainAndValid.csv',\n", " low_memory=False,\n", " parse_dates=['saledate'])" ] }, { "cell_type": "code", "execution_count": 23, "id": "8796321f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dtype('\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalesIDSalePriceMachineIDModelIDdatasourceauctioneerIDYearMadeMachineHoursCurrentMeterUsageBandsaledate...Undercarriage_Pad_WidthStick_LengthThumbPattern_ChangerGrouser_TypeBackhoe_MountingBlade_TypeTravel_ControlsDifferential_TypeSteering_Controls
20561516467709500.01126363843413218.01974NaNNaN1989-01-17...NaNNaNNaNNaNNaNNone or UnspecifiedStraightNone or UnspecifiedNaNNaN
233186172888330000.01523610910513299.01986NaNNaN1989-01-31...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
142491150850221000.01153157413813299.01972NaNNaN1989-01-31...NaNNaNNaNNaNNaNNone or UnspecifiedAngleNone or UnspecifiedNaNNaN
115536145257833000.01544443385413299.01974NaNNaN1989-01-31...NaNNaNNaNNaNNaNNaNNaNNaNStandardConventional
92301140341824000.01390168711013299.01986NaNNaN1989-01-31...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 53 columns

\n", "" ], "text/plain": [ " SalesID SalePrice MachineID ModelID datasource auctioneerID \\\n", "205615 1646770 9500.0 1126363 8434 132 18.0 \n", "233186 1728883 30000.0 1523610 9105 132 99.0 \n", "142491 1508502 21000.0 1153157 4138 132 99.0 \n", "115536 1452578 33000.0 1544443 3854 132 99.0 \n", "92301 1403418 24000.0 1390168 7110 132 99.0 \n", "\n", " YearMade MachineHoursCurrentMeter UsageBand saledate ... \\\n", "205615 1974 NaN NaN 1989-01-17 ... \n", "233186 1986 NaN NaN 1989-01-31 ... \n", "142491 1972 NaN NaN 1989-01-31 ... \n", "115536 1974 NaN NaN 1989-01-31 ... \n", "92301 1986 NaN NaN 1989-01-31 ... \n", "\n", " Undercarriage_Pad_Width Stick_Length Thumb Pattern_Changer \\\n", "205615 NaN NaN NaN NaN \n", "233186 NaN NaN NaN NaN \n", "142491 NaN NaN NaN NaN \n", "115536 NaN NaN NaN NaN \n", "92301 NaN NaN NaN NaN \n", "\n", " Grouser_Type Backhoe_Mounting Blade_Type Travel_Controls \\\n", "205615 NaN None or Unspecified Straight None or Unspecified \n", "233186 NaN NaN NaN NaN \n", "142491 NaN None or Unspecified Angle None or Unspecified \n", "115536 NaN NaN NaN NaN \n", "92301 NaN NaN NaN NaN \n", "\n", " Differential_Type Steering_Controls \n", "205615 NaN NaN \n", "233186 NaN NaN \n", "142491 NaN NaN \n", "115536 Standard Conventional \n", "92301 NaN NaN \n", "\n", "[5 rows x 53 columns]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_tmp.head()" ] }, { "cell_type": "code", "execution_count": 31, "id": "5c5497e4", "metadata": {}, "outputs": [], "source": [ "df_tmp['saleYear'] = df_tmp[:1].saledate.dt.year\n", "df_tmp['saleMonth'] = df_tmp[:1].saledate.dt.month\n", "df_tmp['saleDay'] = df_tmp[:1].saledate.dt.day\n", "df_tmp['saleDayOfWeek'] = df_tmp[:1].saledate.dt.dayofweek\n", "df_tmp['saleDayOYear'] = df_tmp[:1].saledate.dt.dayofyear" ] }, { "cell_type": "code", "execution_count": 32, "id": "6f16033a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalesIDSalePriceMachineIDModelIDdatasourceauctioneerIDYearMadeMachineHoursCurrentMeterUsageBandsaledate...Backhoe_MountingBlade_TypeTravel_ControlsDifferential_TypeSteering_ControlssaleYearsaleMonthsaleDaysaleDayOfWeeksaleDayOYear
20561516467709500.01126363843413218.01974NaNNaN1989-01-17...None or UnspecifiedStraightNone or UnspecifiedNaNNaN1989.01.017.01.017.0
233186172888330000.01523610910513299.01986NaNNaN1989-01-31...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
142491150850221000.01153157413813299.01972NaNNaN1989-01-31...None or UnspecifiedAngleNone or UnspecifiedNaNNaNNaNNaNNaNNaNNaN
115536145257833000.01544443385413299.01974NaNNaN1989-01-31...NaNNaNNaNStandardConventionalNaNNaNNaNNaNNaN
92301140341824000.01390168711013299.01986NaNNaN1989-01-31...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 58 columns

\n", "
" ], "text/plain": [ " SalesID SalePrice MachineID ModelID datasource auctioneerID \\\n", "205615 1646770 9500.0 1126363 8434 132 18.0 \n", "233186 1728883 30000.0 1523610 9105 132 99.0 \n", "142491 1508502 21000.0 1153157 4138 132 99.0 \n", "115536 1452578 33000.0 1544443 3854 132 99.0 \n", "92301 1403418 24000.0 1390168 7110 132 99.0 \n", "\n", " YearMade MachineHoursCurrentMeter UsageBand saledate ... \\\n", "205615 1974 NaN NaN 1989-01-17 ... \n", "233186 1986 NaN NaN 1989-01-31 ... \n", "142491 1972 NaN NaN 1989-01-31 ... \n", "115536 1974 NaN NaN 1989-01-31 ... \n", "92301 1986 NaN NaN 1989-01-31 ... \n", "\n", " Backhoe_Mounting Blade_Type Travel_Controls Differential_Type \\\n", "205615 None or Unspecified Straight None or Unspecified NaN \n", "233186 NaN NaN NaN NaN \n", "142491 None or Unspecified Angle None or Unspecified NaN \n", "115536 NaN NaN NaN Standard \n", "92301 NaN NaN NaN NaN \n", "\n", " Steering_Controls saleYear saleMonth saleDay saleDayOfWeek saleDayOYear \n", "205615 NaN 1989.0 1.0 17.0 1.0 17.0 \n", "233186 NaN NaN NaN NaN NaN NaN \n", "142491 NaN NaN NaN NaN NaN NaN \n", "115536 Conventional NaN NaN NaN NaN NaN \n", "92301 NaN NaN NaN NaN NaN NaN \n", "\n", "[5 rows x 58 columns]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_tmp.head()" ] }, { "cell_type": "code", "execution_count": 35, "id": "c02bc1ce", "metadata": {}, "outputs": [], "source": [ "df_tmp.drop('saledate',axis=1,inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "e8765a53", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestRegressor\n", "model = RandomForestRegressor(n_jobs=-1,\n", " random_state=42)\n", "model.fit" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }