#!/usr/bin/python3
# ================================================================
# Unicode Anglo-Saxon runes, etc.
# 1. display runes
# 2. use enters Unicode code point as a string of hex digits
# display it's character info
# ----------------------------------------------------------------
# Project: generalize to any type of integer input
# (bin, oct, dec, hex)
# ================================================================
import unicodedata as ud
import user_interface as ui
# ---- Unicode code points (Anglo-Saxton runes)
code_points = [ "16A0", "16A2", "16A6", "16A8", "16A8", "16A9",
"16B1", "16B3", "16B7", "16B9", "16BB", "16EB",
"16C1", "16C4", "16C7", "16C8", "16C9", "16CB",
"16CF", "16D2", "16D6", "16D7", "16DA", "16DD",
"16DE", "16DF", "16AA", "16Ab", "16A3", "16E0",
"16E3", "16B8", "16E4", "16E1", "16E2", "16E5",
"16EB", "16EC", "16ED" ]
# ----------------------------------------------------------------
# ---- Function: string length in bytes
# ----------------------------------------------------------------
def utf8len(s:str) -> int:
return len(s.encode('utf-8'))
# -----------------------------------------------------------------
# ---- Function: convert each byte in a string
# ---- into a string of bits
# -----------------------------------------------------------------
def bit_string(s:str) -> str:
# ----convert string to a list of bytes
byts = s.encode('utf-8')
# ---- convert bytes to a list of bit strings
bin_strs = []
for byt in byts:
bin_strs.append(f'{byt:08b}')
# ---- combine bit strings into a single string
return ' '.join(bin_strs)
# ----------------------------------------------------------------
# ---- Function: code point a valid Unicode character?
# ---- Cn - not assigned
# ---- Cs - surrogate
# ---- Co - private use
# ---- Note: some valid code points are unprintable characters
# ---- i.e. catigory Cc (displayed as a ' ' character)
# ----------------------------------------------------------------
def is_valid(code_point:int) -> bool:
if code_point >= 0x110000: return False
return ud.category(chr(code_point)) not in ('Cn', 'Cs', 'Co')
# ----------------------------------------------------------------
# ---- Function: convert a string of hex digits to an integer
# ----------------------------------------------------------------
def is_hex_integer(s:str) -> tuple:
try:
n = int(s,16)
return (True,n)
except Exception as e:
print(f's = {s}')
print(f'e = {e}')
return (False,0)
# ----------------------------------------------------------------
# ---- Function: display Unicode characters
# ---- (assumes a list of valid hex code point strings)
# ----------------------------------------------------------------
def display_characters(code_points:list[int]) -> None:
print()
for s in code_points:
tf,i = is_hex_integer(s)
if not tf: break
print(f'{s} is {chr(i)}')
# -----------------------------------------------------------------
# ---- Function: display a character's bytes and bits
# -----------------------------------------------------------------
def display_character_bytes_and_bits(s:str) -> None:
print()
print(f'chr="{s}" len={len(s)} (char) ' +\
f'sizeof={utf8len(s)} (bytes)')
print()
print(f'bit string is {bit_string(s)}')
# ----------------------------------------------------------------
# ---- main
# ----------------------------------------------------------------
##print()
##print(f'{len(code_points)} Anglo-Saxon rune characters')
##display_characters(code_points)
while True:
print()
s = ui.get_user_input('Enter code point (hex): ')
if not s: break
tf,i = is_hex_integer(s)
if not tf:
print()
print(f'input ({s}) is not a hex integer string')
continue
if not is_valid(i):
print()
print(f'input ({s}) is not a valid code point')
continue
display_character_bytes_and_bits(chr(i))