btree_a_plot_diff.py

#! /usr/bin/python3
# ===================================================================
# Count and print max path difference betwee un-balanced
# and balanced trees
#
# Note: input data is the diff max path values from multiple
#       random trees
# ===================================================================
# analysis
#
# The balance algorithem makes the trees slightly better but in
# few cases make some of them slightly worse.
# ===================================================================
# sample output
#
# -1 -- 36
#  0 -- 542
#  1 -- 357
#  2 -- 64
#  3 -- 1
#
# Negative numbers are levels the balance algorithem add to the
# trees. Positive number are levels removed from the tree.
# ===================================================================
# Questions
#
# 1. Does the the sample size make a difference to how effective
#    the balance algorithm is? (Bigger sample, more effective?)
# 2. Does the the sample size relative to the population size
#    make a difference to how effective the balance algorithm is?
#    (sample bigger percentage of population, more effective?)
# ===================================================================

import csv

rawdatafile = 'btree_a_plot_stats.csv'

csvfile = open(rawdatafile,'r')

csvreader = csv.reader(csvfile)

diffcount = {}                 # diff count (dictionary)

skiponeline = True

for row in csvreader:
    if skiponeline:            # skip csv file header line
        skiponeline = False
        continue
    key = int(row[4])          # diff csv column value key as int
    if key in diffcount:
        diffcount[key] += 1    # update diff count
    else:
        diffcount[key] = 1     # initilize diff count

csvfile.close()

# ---- sort and print key and accumlated diff value

keys = sorted(diffcount.keys())

for k in keys:
    print('{:3} -- {}'.format(k,diffcount[k]))