btree_9_plot_diff.py

#! /usr/bin/python3
# ===================================================================
# Count and print max path difference betwee un-balanced
# and balanced trees
#
# Note: input data is the diff max path values from multiple
#       random trees
# ===================================================================
# analysis
#
# As you can see, my algorithm mostly hurts the situation and
# sometimes in big ways.
# ===================================================================
# sample output
#
# -20 -- 1
# -16 -- 1
# -13 -- 5
# -12 -- 6
# -11 -- 5
# -10 -- 3
#  -9 -- 11
#  -8 -- 20
#  -7 -- 31
#  -6 -- 36
#  -5 -- 39
#  -4 -- 67
#  -3 -- 82
#  -2 -- 105
#  -1 -- 108
#   0 -- 134
#   1 -- 88
#   2 -- 34
#   3 -- 4
#   4 -- 5
#
# Negative numbers are levels the balance algorithem add to the
# trees. Positive number are levels removed from the tree.
# ===================================================================

import csv

rawdatafile = 'btree_9_plot_stats.csv'

csvfile = open(rawdatafile,'r')

csvreader = csv.reader(csvfile)

diffcount = {}                 # diff count (dictionary)

skiponeline = True

for row in csvreader:
    if skiponeline:            # skip csv file header line
        skiponeline = False
        continue
    key = int(row[4])          # diff csv column value key as int
    if key in diffcount:
        diffcount[key] += 1    # update diff count
    else:
        diffcount[key] = 1     # initilize diff count

csvfile.close()

# ---- sort and print key and accumlated diff value

keys = sorted(diffcount.keys())

for k in keys:
    print('{:3} -- {}'.format(k,diffcount[k]))