# ========================================================= # run using Python 2 and Python 3 ... see the difference # # Are Python strings UTF-8 or ASCII. What about Python 2? # ========================================================= def utf8len(s): return len(s.encode('utf-8')) def displayStringInfo(str): print('-----------------------------------------') print('str = \'{}\''.format(str)) print('str is {}'.format(type(str))) print('str len = {}'.format(len(str))) print('utf-8 len = {}'.format(utf8len(str))) displayStringInfo('This is an ASCII string') displayStringInfo(b'this is an UTF-8 decoded binary string'.decode('utf-8')) displayStringInfo(u'This is a UTF-8 string') displayStringInfo('\u221a contains a UTF-8 characer')