# ========================================================= # Regression # # From: Practical Machine Learning with Python # 1. www.youtube.com/watch?v=jcI5Vnw0b2c # (Regression Intro - #2) # 2. www.youtube.com/watch?v=IN5jesocJjk # (regression Features and labels - #3) # # ========================================================= # sudo apt-get install sklearn # sudo apt-get install panda # sudo apt-get install quandrl # ========================================================= import math import pandas as pd import quandl df = quandl.get('WIKI/GOOGL') ##print(df.head()) df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume',]] df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) \ / df['Adj. Close'] * 100.0 df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) \ / df['Adj. Open'] * 100.0 df = df[['Adj. Close','HL_PCT','PCT_change','Adj. Volume']] ##print(df.head()) forcast_col = 'Adj. Close' df.fillna(-99999, inplace=True) # -- predict out 10% of the data frame # -- using data that came in 10 days ago to predict the future forcast_out = int(math.ceil(0.01*len(df))) # -- create a label # -- "label" for each row will be the "Adj. Close Price" # -- forcast days into the future df['label'] = df[forcast_col].shift(-forcast_out) # -- you can now compare forcast and actual price # -- display forcast close price and (actual) adjust close price df.dropna(inplace=True) print(df.head())