Updated the utils/unicode.py file with 2/3 compatibility.

NOTES:
  1. replaced unicode, str, etc with their six counterparts
  2. isinstance(obj, basestring) -> isinstance(obj, (string_types, text_type))
  3. I'm not entirely confident about the behaviour of __str__ and __unicode__ between versions
     so that might require a bit more testing.
This commit is contained in:
Rory Finnegan 2015-04-15 00:03:55 -04:00
parent 1e139fe08f
commit f8fe1357b0

View file

@ -19,6 +19,8 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
from six import string_types, text_type, binary_type
# to_bytes and to_unicode were written by Toshio Kuratomi for the
# python-kitchen library https://pypi.python.org/pypi/kitchen
# They are licensed in kitchen under the terms of the GPLv2+
@ -88,13 +90,13 @@ def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None):
'''
# Could use isbasestring/isunicode here but we want this code to be as
# fast as possible
if isinstance(obj, basestring):
if isinstance(obj, unicode):
if isinstance(obj, (string_types, text_type)):
if isinstance(obj, text_type):
return obj
if encoding in _UTF8_ALIASES:
return unicode(obj, 'utf-8', errors)
return text_type(obj, 'utf-8', errors)
if encoding in _LATIN1_ALIASES:
return unicode(obj, 'latin-1', errors)
return text_type(obj, 'latin-1', errors)
return obj.decode(encoding, errors)
if not nonstring:
@ -116,13 +118,13 @@ def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None):
simple = obj.__str__()
except (UnicodeError, AttributeError):
simple = u''
if isinstance(simple, str):
return unicode(simple, encoding, errors)
if isinstance(simple, binary_type):
return text_type(simple, encoding, errors)
return simple
elif nonstring in ('repr', 'strict'):
obj_repr = repr(obj)
if isinstance(obj_repr, str):
obj_repr = unicode(obj_repr, encoding, errors)
obj_repr = text_type(obj_repr, encoding, errors)
if nonstring == 'repr':
return obj_repr
raise TypeError('to_unicode was given "%(obj)s" which is neither'
@ -197,7 +199,7 @@ def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None):
'''
# Could use isbasestring, isbytestring here but we want this to be as fast
# as possible
if isinstance(obj, basestring):
if isinstance(obj, (string_types, text_type)):
if isinstance(obj, str):
return obj
return obj.encode(encoding, errors)
@ -210,7 +212,7 @@ def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None):
return obj
elif nonstring == 'simplerepr':
try:
simple = str(obj)
simple = binary_type(obj)
except UnicodeError:
try:
simple = obj.__str__()
@ -221,7 +223,7 @@ def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None):
simple = obj.__unicode__()
except (AttributeError, UnicodeError):
simple = ''
if isinstance(simple, unicode):
if isinstance(simple, text_type):
simple = simple.encode(encoding, 'replace')
return simple
elif nonstring in ('repr', 'strict'):
@ -229,10 +231,10 @@ def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None):
obj_repr = obj.__repr__()
except (AttributeError, UnicodeError):
obj_repr = ''
if isinstance(obj_repr, unicode):
if isinstance(obj_repr, text_type):
obj_repr = obj_repr.encode(encoding, errors)
else:
obj_repr = str(obj_repr)
obj_repr = binary_type(obj_repr)
if nonstring == 'repr':
return obj_repr
raise TypeError('to_bytes was given "%(obj)s" which is neither'