Skip to content
This repository was archived by the owner on Jul 4, 2023. It is now read-only.

Commit 7f82397

Browse files
committed
Static tokenizer Iterable instead of list
1 parent 05172e1 commit 7f82397

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

torchnlp/encoders/text/static_tokenizer_encoder.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import Counter
2+
from collections.abc import Iterable
23

34
import torch
45

@@ -21,7 +22,7 @@ class StaticTokenizerEncoder(TextEncoder):
2122
""" Encodes a text sequence using a static tokenizer.
2223
2324
Args:
24-
sample (list): Sample of data used to build encoding dictionary.
25+
sample (collections.abc.Iterable): Sample of data used to build encoding dictionary.
2526
min_occurrences (int, optional): Minimum number of occurrences for a token to be added to
2627
the encoding dictionary.
2728
tokenize (callable): :class:`callable` to tokenize a sequence.
@@ -64,8 +65,8 @@ def __init__(self,
6465
**kwargs):
6566
super().__init__(**kwargs)
6667

67-
if not isinstance(sample, list):
68-
raise TypeError('Sample must be a list.')
68+
if not isinstance(sample, Iterable):
69+
raise TypeError('Sample must be a `collections.abc.Iterable`.')
6970

7071
self.eos_index = eos_index
7172
self.unknown_index = unknown_index

0 commit comments

Comments
 (0)