Select Page

Age decision tree

train <- read.csv(“../input/train.csv”) test <- read.csv(“../input/test.csv”) test\$Survived <- 0 combi <- rbind(train, test) combi\$Name <- as.character(combi\$Name) strsplit(combi\$Name[1], split='[,.]’) strsplit(combi\$Name[1], split='[,.]’)[[1]] strsplit(combi\$Name[1], split='[,.]’)[[1]][2] combi\$Title <- sapply(combi\$Name, FUN=function(x) {strsplit(x, split='[,.]’)[[1]][2]}) combi\$Title <- sub(‘ ‘, ”, combi\$Title) combi\$Title[combi\$Title %in% c(‘Mme’, ‘Mlle’)] <- ‘Mlle’ combi\$Title[combi\$Title %in% c(‘Capt’, ‘Don’, ‘Major’, ‘Sir’)] <- ‘Sir’ combi\$Title[combi\$Title %in% c(‘Dona’, ‘Lady’, ‘the Countess’, ‘Jonkheer’)] <- ‘Lady’ combi\$Title <- factor(combi\$Title) # Passenger on row 62 and 830 do not have a value for embarkment. # Since many passengers embarked at Southampton, we give them the value S. # We code all embarkment codes as factors. combi\$Embarked[c(62,830)] = “S” combi\$Embarked <- factor(combi\$Embarked) # Passenger on row 1044 has an NA Fare value. Let’s replace it with the median fare value. combi\$Fare[1044] <- median(combi\$Fare, na.rm=TRUE) library(“rpart”) library(“rpart.plot”) #library(“rattle”) # How to fill…