20
20
import unicodedata
21
21
22
22
# Dependency imports
23
+ from third_party .lazy_loader import LazyLoader
23
24
24
- import six
25
- from six .moves import xrange # pylint: disable=redefined-builtin
25
+ six = LazyLoader ('six' , globals (), 'six' )
26
26
27
27
logger = logging .getLogger (__name__ )
28
28
29
29
# This set contains all letter and number characters.
30
30
_ALPHANUMERIC_CHAR_SET = set (
31
31
six .unichr (i )
32
- for i in xrange (sys .maxunicode )
32
+ for i in six . moves . xrange (sys .maxunicode )
33
33
if (unicodedata .category (six .unichr (i )).startswith ("L" ) or
34
34
unicodedata .category (six .unichr (i )).startswith ("N" )))
35
35
@@ -70,7 +70,7 @@ def encode(text):
70
70
token_start = 0
71
71
# Classify each character in the input string
72
72
is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text ]
73
- for pos in xrange (1 , len (text )):
73
+ for pos in six . moves . xrange (1 , len (text )):
74
74
if is_alnum [pos ] != is_alnum [pos - 1 ]:
75
75
token = text [token_start :pos ]
76
76
if token != u" " or token_start == 0 :
@@ -242,7 +242,7 @@ def _escaped_token_to_subtoken_strings(self, escaped_token):
242
242
start = 0
243
243
token_len = len (escaped_token )
244
244
while start < token_len :
245
- for end in xrange (min (token_len , start + self ._max_subtoken_len ), start , - 1 ):
245
+ for end in six . moves . xrange (min (token_len , start + self ._max_subtoken_len ), start , - 1 ):
246
246
subtoken = escaped_token [start :end ]
247
247
if subtoken in self ._all_subtoken_strings :
248
248
ret .append (subtoken )
@@ -356,7 +356,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
356
356
# with high enough counts for our new vocabulary.
357
357
if min_count < 1 :
358
358
min_count = 1
359
- for i in xrange (num_iterations ):
359
+ for i in six . moves . xrange (num_iterations ):
360
360
361
361
# Collect all substrings of the encoded token that break along current
362
362
# subtoken boundaries.
@@ -366,7 +366,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
366
366
subtokens = self ._escaped_token_to_subtoken_strings (escaped_token )
367
367
start = 0
368
368
for subtoken in subtokens :
369
- for end in xrange (start + 1 , len (escaped_token ) + 1 ):
369
+ for end in six . moves . xrange (start + 1 , len (escaped_token ) + 1 ):
370
370
new_subtoken = escaped_token [start :end ]
371
371
subtoken_counts [new_subtoken ] += count
372
372
start += len (subtoken )
@@ -384,7 +384,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
384
384
# a longer subtoken string, we can decrement the counts of its
385
385
# prefixes.
386
386
new_subtoken_strings = []
387
- for lsub in xrange (len (len_to_subtoken_strings ) - 1 , 0 , - 1 ):
387
+ for lsub in six . moves . xrange (len (len_to_subtoken_strings ) - 1 , 0 , - 1 ):
388
388
subtoken_strings = len_to_subtoken_strings [lsub ]
389
389
for subtoken_string in subtoken_strings :
390
390
count = subtoken_counts [subtoken_string ]
@@ -393,7 +393,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
393
393
# explicitly, regardless of count.
394
394
if subtoken_string not in self ._alphabet :
395
395
new_subtoken_strings .append ((count , subtoken_string ))
396
- for l in xrange (1 , lsub ):
396
+ for l in six . moves . xrange (1 , lsub ):
397
397
subtoken_counts [subtoken_string [:l ]] -= count
398
398
399
399
# Include the alphabet explicitly to guarantee all strings are
0 commit comments