pandas_demo_01.py

#!/usr/bin/python3
# ===================================================================
# getting started with Pandas sort methods
#    www.youtube.com/watch?v=gUDkkF1ox0Y
# ===================================================================
# will need a basic understanding of pandas dataframe:
#    realpython.com/courses/pandas-dataframe-working-with-data/
# ===================================================================
# use EPA fuel economy dataset:
#    www.fueleconomy.gov/feg/download.shtml
# ===================================================================

import pandas as pd

# ---- subste to csv data columns

column_subset = [ 'id',
                  'make',
                  'model',
                  'year',
                  'cylinders',
                  'fuelType',
                  'trany',
                  'mpgData',
                  'city08',
                  'highway08' ]

# ---- read the first 100 rows of dataset into a pandas dataframe
# ---- read from a URL or a local file

print()
print('---- read first 100 rows ---------------------------')
print()

df = pd.read_csv(
     ##'https://www.fueleconomy.gov/feg/epadata/vehicles.csv',
     'vehicles.csv',
     usecols = column_subset,
     nrows=100)


# ---- view first 5 rows of the dataframe

print()
print('---- view first 5 rows -----------------------------')
print()

print(df.head())


# ---- sort dataframe

print()
print('---- print sorted by highway98 ---------------------')
print()

print(df.sort_values(by='highway08'))