library(randomForest) library(ggplot2) set.seed(1) my_train <- read.csv(“../input/train.csv”, stringsAsFactors=FALSE) my_test <- read.csv(“../input/test.csv”, stringsAsFactors=FALSE) get_features <- function(data) { features <- c(“Pclass”,”Age”,”Sex”,”Parch”,”SibSp”,”Fare”,”Embarked”) char <- data[,features] char$Age[$Age)] <- -1 char$Fare[$Fare)] <- median(char$Fare, na.rm=TRUE) char$Embarked[char$Embarked==””] = “S” char$Sex <- as.factor(char$Sex) char$Embarked <- as.factor(char$Embarked) return(char) } rf <- randomForest(get_features(my_train), as.factor(my_train$Survived), ntree=100, importance=TRUE) result <- data.frame(PassengerId = my_test$PassengerId) result$Survived <- predict(rf, get_features(my_test)) write.csv(result, file = “result.csv”, row.names=FALSE) This script has been released under the Apache 2.0 open source license.

Link to Full Article: projectz

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!