bstring_test_01.py

# =========================================================
# run using Python 2 and Python 3 ... see the difference
#
# Are Python strings UTF-8 or ASCII. What about Python 2?
# =========================================================

def utf8len(s):
    return len(s.encode('utf-8'))


def displayStringInfo(str):
    print('-----------------------------------------')
    print('str = \'{}\''.format(str))
    print('str is {}'.format(type(str)))
    print('str len   = {}'.format(len(str)))
    print('utf-8 len = {}'.format(utf8len(str)))

displayStringInfo('This is an ASCII string')

displayStringInfo(b'this is an UTF-8 decoded binary string'.decode('utf-8'))

displayStringInfo(u'This is a UTF-8 string')

displayStringInfo('\u221a contains a UTF-8 characer')