#! /usr/bin/python3 # =================================================================== # Count and print max path difference betwee un-balanced # and balanced trees # # Note: input data is the diff max path values from multiple # random trees # =================================================================== # analysis # # The balance algorithem makes the trees slightly better but in # few cases make some of them slightly worse. # =================================================================== # sample output # # -1 -- 36 # 0 -- 542 # 1 -- 357 # 2 -- 64 # 3 -- 1 # # Negative numbers are levels the balance algorithem add to the # trees. Positive number are levels removed from the tree. # =================================================================== # Questions # # 1. Does the the sample size make a difference to how effective # the balance algorithm is? (Bigger sample, more effective?) # 2. Does the the sample size relative to the population size # make a difference to how effective the balance algorithm is? # (sample bigger percentage of population, more effective?) # =================================================================== import csv rawdatafile = 'btree_a_plot_stats.csv' csvfile = open(rawdatafile,'r') csvreader = csv.reader(csvfile) diffcount = {} # diff count (dictionary) skiponeline = True for row in csvreader: if skiponeline: # skip csv file header line skiponeline = False continue key = int(row[4]) # diff csv column value key as int if key in diffcount: diffcount[key] += 1 # update diff count else: diffcount[key] = 1 # initilize diff count csvfile.close() # ---- sort and print key and accumlated diff value keys = sorted(diffcount.keys()) for k in keys: print('{:3} -- {}'.format(k,diffcount[k]))