Zipf Distribution Examples

Zipf's law (Wikipedia)

image missing

The red line and the blue bars are the theoretical
Zipf distribution. (1, 1/2, 1/3, 1/4, ..., 1/n)

Example of Zipf Distribution

#!/usr/bin/python3 # ============================================================= # Zipf distribution (Zipf's law) # # Note: Rank is a position in the hierarchy. # (i.e. first, second, third, ...) # ============================================================= import matplotlib.pyplot as plt zipf_max = 100.0 zipf_size = 20 # ---- zipf distribution values (1, 1/2, 1/3, 1/4, ..., 1/n) zipf_list = [] for i in range(1,zipf_size+1): zipf_list.append(zipf_max/i) ##print(f'i={i} {zipf_max/i}') # ---- plot zipf distribution plt.title('Zipf Distribution') plt.xlabel('Zipf Rank') plt.ylabel('Zipf Rank Value') plt.xticks([i for i in range(0,len(zipf_list)+1,2)]) plt.grid() x = [i for i in range(1,len(zipf_list)+1)] plt.plot(x,zipf_list,linewidth=2,color='r') plt.bar(x,zipf_list,linewidth=8,color='blue') plt.show()

Example of Zipf-Mandelbrot Distribution

Basically the same results as Example #1 but using different math.

#!/usr/bin/python3 # ============================================================= # Zipf-Mandelbrot law # # Note: Rank is a position in the hierarchy. # (i.e. first, second, third, ...) # ============================================================= # From Zipf's Law (Wikipedia) # # Zipf-Mandelbrot law: # word_frequency = 1/((rank+b)**a) # or # word_frequency = 1/pow((rank+b),a) # with # a approximately 1.0 # b approximately 2.7 # ============================================================= import matplotlib.pyplot as plt zipf_max = 100.0 zipf_size = 20 a = 1.0 b = 2.7 # ---- Zipf-Mandelbrot frequency values # ---- scale frequency values to 'zipf_max' # ---- Note: zipf_max should be the first element in # ---- zipf_list counts decrease from there freq_list = [] zipf_list = [] for i in range(1,zipf_size+1): freq = 1/pow(i+b,a) freq_list.append(freq) zipf_list.append(zipf_max*(freq/freq_list[0])) ##for r,f in enumerate(freq_list,start=1): ## print(f'rank={r:<2} freq={f:6.4f} ' +\ ## f'value={zipf_list[r-1]:=8.4f}') # ---- plot zipf distribution plt.title('Zipf-Mandelbrot Distribution') plt.ylabel('Zipf Rank Value') plt.xlabel('Zipf Rank') plt.xticks([i for i in range(0,len(zipf_list)+1,2)]) plt.grid() rank = [i for i in range(1,len(zipf_list)+1)] plt.plot(rank,zipf_list,linewidth=2,color='r') plt.bar(rank,zipf_list,linewidth=8,color='blue') plt.show()