@@ -137,7 +137,7 @@ def __init__(self, docs: Optional[Union[Dict[str, str], Sequence[Document]]] = N
137
137
138
138
# declare public attributes
139
139
#: SpaCy Language instance
140
- self .nlp : Language
140
+ self .nlp : Optional [ Language ] = None
141
141
#: preprocessing pipeline for raw input text; must consist of functions that accept a string and return
142
142
# a processed string
143
143
self .raw_preproc : List [Callable ]
@@ -206,14 +206,14 @@ def __init__(self, docs: Optional[Union[Dict[str, str], Sequence[Document]]] = N
206
206
language_model = DEFAULT_LANGUAGE_MODELS [language ] + '_' + model_suffix
207
207
208
208
# model meta information
209
- try :
210
- model_info = spacy .info (language_model )
211
- except (RuntimeError , SystemExit ):
209
+ if language_model not in spacy .util .get_installed_models ():
212
210
raise RuntimeError (f'language model "{ language_model } " cannot be loaded; are you sure it is installed? '
213
211
f'see https://spacy.io/models or '
214
212
f'https://tmtoolkit.readthedocs.io/en/latest/install.html for further information '
215
213
f'on installing language models' )
216
214
215
+ model_info = spacy .info (language_model )
216
+
217
217
# the default pipeline compenents for SpaCy language models – these would be loaded *and enabled* if not
218
218
# explicitly excluded
219
219
default_components = set (model_info ['pipeline' ])
@@ -534,17 +534,23 @@ def ngrams_join_str(self) -> str:
534
534
@property
535
535
def language (self ) -> str :
536
536
"""Return Corpus language as two-letter ISO 639-1 language code."""
537
- return self .nlp .lang
537
+ if self .nlp :
538
+ return self .nlp .lang
539
+ else :
540
+ return '<not initialized>'
538
541
539
542
@property
540
543
def language_model (self ) -> str :
541
544
"""Return name of the language model that was loaded."""
542
- return self .nlp .lang + '_' + self .nlp .meta ['name' ]
545
+ if self .nlp :
546
+ return self .nlp .lang + '_' + self .nlp .meta ['name' ]
547
+ else :
548
+ return '<not initialized>'
543
549
544
550
@property
545
551
def has_sents (self ) -> bool :
546
552
"""Return True if information sentence borders were parsed for documents in this corpus, else return False."""
547
- return 'parser' in self .nlp .pipe_names or 'senter' in self .nlp .pipe_names
553
+ return self . nlp and ( 'parser' in self .nlp .pipe_names or 'senter' in self .nlp .pipe_names )
548
554
549
555
@property
550
556
def doc_labels (self ) -> List [str ]:
0 commit comments