import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier def df_cleaner(df): “”” Clean up a few variables in the training/test sets. “”” # Clean up ages. for passenger in df[(df[‘Age’].isnull())].index: df.loc[passenger, ‘Age’] = np.average(df[(df[‘Age’].notnull())][‘Age’]) # Clean up fares. for passenger in df[(df[‘Fare’].isnull())].index: df.loc[passenger, ‘Fare’] = np.average(df[(df[‘Fare’].notnull())][‘Fare’]) # Manually convert values to numeric columns for clarity. # Change the sex to a binary column. df[‘Sex’][(df[‘Sex’] == ‘male’)] = 0 df[‘Sex’][(df[‘Sex’] == ‘female’)] = 1 df[‘Sex’][(df[‘Sex’].isnull())] = 2 # Transform to categorical data. df[‘Embarked’][(df[‘Embarked’] == ‘S’)] = 0 df[‘Embarked’][(df[‘Embarked’] == ‘C’)] = 1 df[‘Embarked’][(df[‘Embarked’] == ‘Q’)] = 2 df[‘Embarked’][(df[‘Embarked’].isnull())] = 3 return df def main(): “”” Visualization of random forest accuracy as function of the number of tress available in the ensemble. “”” # Read…

Link to Full Article: borrowed1

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!