Lazy load six and requests for production

PetrochukM · PetrochukM · commit 38619d963789 · 2019-08-19T16:56:14.000-07:00
diff --git a/torchnlp/download.py b/torchnlp/download.py
@@ -2,13 +2,15 @@
 
 import logging
 import os
-import requests
 import subprocess
 import urllib.request
 import zipfile
 
+from third_party.lazy_loader import LazyLoader
 from tqdm import tqdm
 
+requests = LazyLoader('requests', globals(), 'requests')
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/torchnlp/encoders/text/subword_text_tokenizer.py b/torchnlp/encoders/text/subword_text_tokenizer.py
@@ -20,16 +20,16 @@
 import unicodedata
 
 # Dependency imports
+from third_party.lazy_loader import LazyLoader
 
-import six
-from six.moves import xrange  # pylint: disable=redefined-builtin
+six = LazyLoader('six', globals(), 'six')
 
 logger = logging.getLogger(__name__)
 
 # This set contains all letter and number characters.
 _ALPHANUMERIC_CHAR_SET = set(
     six.unichr(i)
-    for i in xrange(sys.maxunicode)
+    for i in six.moves.xrange(sys.maxunicode)
     if (unicodedata.category(six.unichr(i)).startswith("L") or
         unicodedata.category(six.unichr(i)).startswith("N")))
 
@@ -70,7 +70,7 @@ def encode(text):
     token_start = 0
     # Classify each character in the input string
     is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text]
-    for pos in xrange(1, len(text)):
+    for pos in six.moves.xrange(1, len(text)):
         if is_alnum[pos] != is_alnum[pos - 1]:
             token = text[token_start:pos]
             if token != u" " or token_start == 0:
@@ -242,7 +242,7 @@ def _escaped_token_to_subtoken_strings(self, escaped_token):
         start = 0
         token_len = len(escaped_token)
         while start < token_len:
-            for end in xrange(min(token_len, start + self._max_subtoken_len), start, -1):
+            for end in six.moves.xrange(min(token_len, start + self._max_subtoken_len), start, -1):
                 subtoken = escaped_token[start:end]
                 if subtoken in self._all_subtoken_strings:
                     ret.append(subtoken)
@@ -356,7 +356,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
         # with high enough counts for our new vocabulary.
         if min_count < 1:
             min_count = 1
-        for i in xrange(num_iterations):
+        for i in six.moves.xrange(num_iterations):
 
             # Collect all substrings of the encoded token that break along current
             # subtoken boundaries.
@@ -366,7 +366,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
                 subtokens = self._escaped_token_to_subtoken_strings(escaped_token)
                 start = 0
                 for subtoken in subtokens:
-                    for end in xrange(start + 1, len(escaped_token) + 1):
+                    for end in six.moves.xrange(start + 1, len(escaped_token) + 1):
                         new_subtoken = escaped_token[start:end]
                         subtoken_counts[new_subtoken] += count
                     start += len(subtoken)
@@ -384,7 +384,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
             # a longer subtoken string, we can decrement the counts of its
             # prefixes.
             new_subtoken_strings = []
-            for lsub in xrange(len(len_to_subtoken_strings) - 1, 0, -1):
+            for lsub in six.moves.xrange(len(len_to_subtoken_strings) - 1, 0, -1):
                 subtoken_strings = len_to_subtoken_strings[lsub]
                 for subtoken_string in subtoken_strings:
                     count = subtoken_counts[subtoken_string]
@@ -393,7 +393,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
                         # explicitly, regardless of count.
                         if subtoken_string not in self._alphabet:
                             new_subtoken_strings.append((count, subtoken_string))
-                        for l in xrange(1, lsub):
+                        for l in six.moves.xrange(1, lsub):
                             subtoken_counts[subtoken_string[:l]] -= count
 
             # Include the alphabet explicitly to guarantee all strings are
diff --git a/torchnlp/metrics/bleu.py b/torchnlp/metrics/bleu.py
@@ -20,9 +20,11 @@
 import tempfile
 import logging
 
+from third_party.lazy_loader import LazyLoader
+
 import numpy as np
 
-from six.moves import urllib
+six = LazyLoader('six', globals(), 'six')
 
 logger = logging.getLogger(__name__)
 
@@ -64,7 +66,7 @@ def get_moses_multi_bleu(hypotheses, references, lowercase=False):
 
     # Get MOSES multi-bleu script
     try:
-        multi_bleu_path, _ = urllib.request.urlretrieve(
+        multi_bleu_path, _ = six.moves.urllib.request.urlretrieve(
             "https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
             "master/scripts/generic/multi-bleu.perl")
         os.chmod(multi_bleu_path, 0o755)
diff --git a/torchnlp/word_to_vector/pretrained_word_vectors.py b/torchnlp/word_to_vector/pretrained_word_vectors.py
@@ -34,10 +34,11 @@
 import logging
 import os
 
+from third_party.lazy_loader import LazyLoader
 from tqdm import tqdm
 
-import six
 import torch
+six = LazyLoader('six', globals(), 'six')
 
 from torchnlp.download import download_file_maybe_extract