#!/usr/bin/python3
# ===================================================================
# getting started with Pandas sort methods
# www.youtube.com/watch?v=gUDkkF1ox0Y
# ===================================================================
# will need a basic understanding of pandas dataframe:
# realpython.com/courses/pandas-dataframe-working-with-data/
# ===================================================================
# use EPA fuel economy dataset:
# www.fueleconomy.gov/feg/download.shtml
# ===================================================================
import pandas as pd
# ---- subste to csv data columns
column_subset = [ 'id',
'make',
'model',
'year',
'cylinders',
'fuelType',
'trany',
'mpgData',
'city08',
'highway08' ]
# ---- read the first 100 rows of dataset into a pandas dataframe
# ---- read from a URL or a local file
print()
print('---- read first 100 rows ---------------------------')
print()
df = pd.read_csv(
##'https://www.fueleconomy.gov/feg/epadata/vehicles.csv',
'vehicles.csv',
usecols = column_subset,
nrows=100)
# ---- view first 5 rows of the dataframe
print()
print('---- view first 5 rows -----------------------------')
print()
print(df.head())
# ---- sort dataframe
print()
print('---- print sorted by highway98 ---------------------')
print()
print(df.sort_values(by='highway08'))