ml004.py

# =========================================================
# Basic pipeline
# From: www.youtube.com/watch?v=84gqSblcBEF
# =========================================================

from sklearn import datasets

iris = datasets.load_iris()

x = iris.data
y = iris.target

# --- create train and test data
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.5) 

# --- create classifier using train data
# --- use decision tree classifier
#from sklearn import tree
#my_classifier = tree.DecisionTreeClassifier()

# --- create nearest neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
my_classifier = KNeighborsClassifier()

my_classifier.fit(x_train, y_train)

# --- create test data predictions
predictions = my_classifier.predict(x_test)
print(predictions)

# --- how accuracet is the classifier based on the train/test data
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))