import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn import svm

#Print you can execute arbitrary python code
train = pd.read_csv("../input/train.csv", dtype={"Age": np.float64}, )
test = pd.read_csv("../input/test.csv", dtype={"Age": np.float64}, )

train = pd.DataFrame(train)
train = train.replace(["male","female"],[1, 0])
train["Age"] = train["Age"].replace("NaN", 0)

test = pd.DataFrame(test)
test = test.replace(["male", "female"],[1, 0])
test["Age"]  = test["Age"].replace("Nan", 0)

#Print to standard output, and see the results in the "log" section below after running your script
print("nnTop of the training data:")

print("nnSummary statistics of training data")

#Any files you save will be available in the output tab below
train.to_csv('copy_of_the_training_data.csv', index=False)

chosen_features = ["Pclass", "Sex", "Age"]

train_features = train[chosen_features]
train_labels = train["Survived"]
test_features = test[chosen_features]

clf = tree.DecisionTreeClassifier(min_samples_leaf = 50)
clf.fit(train_features, train_labels)
pred = clf.predict(test_features)

clf = svm.SVC(kernel="linear")
clf.fit(train_features, train_labels)
pred = clf.predict(test_features)

sub["PassengerId"] = test["PassengerId"]
sub["Survived"] = pred
sub.to_csv("sub.csv", index = False)


URL: Titanic

Source: Google Alert for ML

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about homeAI.info and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!