pml001.py

# =========================================================
# Regression
#
# From: Practical Machine Learning with Python
# 1. www.youtube.com/watch?v=jcI5Vnw0b2c
#    (Regression Intro - #2)
# 2. www.youtube.com/watch?v=IN5jesocJjk
#    (regression Features and labels - #3)
#
# =========================================================
# sudo apt-get install sklearn
# sudo apt-get install panda
# sudo apt-get install quandrl
# =========================================================

import math
import pandas as pd
import quandl

df = quandl.get('WIKI/GOOGL')

##print(df.head())

df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume',]]

df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) \
    / df['Adj. Close'] * 100.0

df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) \
    / df['Adj. Open'] * 100.0

df = df[['Adj. Close','HL_PCT','PCT_change','Adj. Volume']]

##print(df.head())

forcast_col = 'Adj. Close'
df.fillna(-99999, inplace=True)

# -- predict out 10% of the data frame
# -- using data that came in 10 days ago to predict the future

forcast_out = int(math.ceil(0.01*len(df)))

# -- create a label
# -- "label" for each row will be the "Adj. Close Price"
# -- forcast days into the future

df['label'] = df[forcast_col].shift(-forcast_out)

# -- you can now compare forcast and actual price
# -- display forcast close price and (actual) adjust close price

df.dropna(inplace=True)
print(df.head())