Skip to content
This repository was archived by the owner on Jul 4, 2023. It is now read-only.

Commit 38619d9

Browse files
committed
Lazy load six and requests for production
1 parent 4e84780 commit 38619d9

File tree

4 files changed

+18
-13
lines changed

4 files changed

+18
-13
lines changed

torchnlp/download.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22

33
import logging
44
import os
5-
import requests
65
import subprocess
76
import urllib.request
87
import zipfile
98

9+
from third_party.lazy_loader import LazyLoader
1010
from tqdm import tqdm
1111

12+
requests = LazyLoader('requests', globals(), 'requests')
13+
1214
logger = logging.getLogger(__name__)
1315

1416

torchnlp/encoders/text/subword_text_tokenizer.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
import unicodedata
2121

2222
# Dependency imports
23+
from third_party.lazy_loader import LazyLoader
2324

24-
import six
25-
from six.moves import xrange # pylint: disable=redefined-builtin
25+
six = LazyLoader('six', globals(), 'six')
2626

2727
logger = logging.getLogger(__name__)
2828

2929
# This set contains all letter and number characters.
3030
_ALPHANUMERIC_CHAR_SET = set(
3131
six.unichr(i)
32-
for i in xrange(sys.maxunicode)
32+
for i in six.moves.xrange(sys.maxunicode)
3333
if (unicodedata.category(six.unichr(i)).startswith("L") or
3434
unicodedata.category(six.unichr(i)).startswith("N")))
3535

@@ -70,7 +70,7 @@ def encode(text):
7070
token_start = 0
7171
# Classify each character in the input string
7272
is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text]
73-
for pos in xrange(1, len(text)):
73+
for pos in six.moves.xrange(1, len(text)):
7474
if is_alnum[pos] != is_alnum[pos - 1]:
7575
token = text[token_start:pos]
7676
if token != u" " or token_start == 0:
@@ -242,7 +242,7 @@ def _escaped_token_to_subtoken_strings(self, escaped_token):
242242
start = 0
243243
token_len = len(escaped_token)
244244
while start < token_len:
245-
for end in xrange(min(token_len, start + self._max_subtoken_len), start, -1):
245+
for end in six.moves.xrange(min(token_len, start + self._max_subtoken_len), start, -1):
246246
subtoken = escaped_token[start:end]
247247
if subtoken in self._all_subtoken_strings:
248248
ret.append(subtoken)
@@ -356,7 +356,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
356356
# with high enough counts for our new vocabulary.
357357
if min_count < 1:
358358
min_count = 1
359-
for i in xrange(num_iterations):
359+
for i in six.moves.xrange(num_iterations):
360360

361361
# Collect all substrings of the encoded token that break along current
362362
# subtoken boundaries.
@@ -366,7 +366,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
366366
subtokens = self._escaped_token_to_subtoken_strings(escaped_token)
367367
start = 0
368368
for subtoken in subtokens:
369-
for end in xrange(start + 1, len(escaped_token) + 1):
369+
for end in six.moves.xrange(start + 1, len(escaped_token) + 1):
370370
new_subtoken = escaped_token[start:end]
371371
subtoken_counts[new_subtoken] += count
372372
start += len(subtoken)
@@ -384,7 +384,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
384384
# a longer subtoken string, we can decrement the counts of its
385385
# prefixes.
386386
new_subtoken_strings = []
387-
for lsub in xrange(len(len_to_subtoken_strings) - 1, 0, -1):
387+
for lsub in six.moves.xrange(len(len_to_subtoken_strings) - 1, 0, -1):
388388
subtoken_strings = len_to_subtoken_strings[lsub]
389389
for subtoken_string in subtoken_strings:
390390
count = subtoken_counts[subtoken_string]
@@ -393,7 +393,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
393393
# explicitly, regardless of count.
394394
if subtoken_string not in self._alphabet:
395395
new_subtoken_strings.append((count, subtoken_string))
396-
for l in xrange(1, lsub):
396+
for l in six.moves.xrange(1, lsub):
397397
subtoken_counts[subtoken_string[:l]] -= count
398398

399399
# Include the alphabet explicitly to guarantee all strings are

torchnlp/metrics/bleu.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
import tempfile
2121
import logging
2222

23+
from third_party.lazy_loader import LazyLoader
24+
2325
import numpy as np
2426

25-
from six.moves import urllib
27+
six = LazyLoader('six', globals(), 'six')
2628

2729
logger = logging.getLogger(__name__)
2830

@@ -64,7 +66,7 @@ def get_moses_multi_bleu(hypotheses, references, lowercase=False):
6466

6567
# Get MOSES multi-bleu script
6668
try:
67-
multi_bleu_path, _ = urllib.request.urlretrieve(
69+
multi_bleu_path, _ = six.moves.urllib.request.urlretrieve(
6870
"https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
6971
"master/scripts/generic/multi-bleu.perl")
7072
os.chmod(multi_bleu_path, 0o755)

torchnlp/word_to_vector/pretrained_word_vectors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,11 @@
3434
import logging
3535
import os
3636

37+
from third_party.lazy_loader import LazyLoader
3738
from tqdm import tqdm
3839

39-
import six
4040
import torch
41+
six = LazyLoader('six', globals(), 'six')
4142

4243
from torchnlp.download import download_file_maybe_extract
4344

0 commit comments

Comments
 (0)