Skip to content

Commit 5f2f71f

Browse files
authored
Merge pull request #150 from karin0018/dev
[DOC] Update docs
2 parents 4ba7ec4 + c840248 commit 5f2f71f

File tree

11 files changed

+328
-77
lines changed

11 files changed

+328
-77
lines changed

EduNLP/I2V/i2v.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,39 @@ def __call__(self, items, *args, **kwargs):
9797
return self.infer_vector(items, *args, **kwargs)
9898

9999
def tokenize(self, items, *args, key=lambda x: x, **kwargs) -> list:
100-
# """tokenize item"""
100+
"""
101+
tokenize item
102+
Parameter
103+
----------
104+
items: a list of questions
105+
Return
106+
----------
107+
tokens: list
108+
"""
101109
return self.tokenizer(items, *args, key=key, **kwargs)
102110

103111
def infer_vector(self, items, key=lambda x: x, **kwargs) -> tuple:
112+
"""
113+
get question embedding
114+
NotImplemented
115+
"""
104116
raise NotImplementedError
105117

106118
def infer_item_vector(self, tokens, *args, **kwargs) -> ...:
119+
"""NotImplemented"""
107120
return self.infer_vector(tokens, *args, **kwargs)[0]
108121

109122
def infer_token_vector(self, tokens, *args, **kwargs) -> ...:
123+
"""NotImplemented"""
110124
return self.infer_vector(tokens, *args, **kwargs)[1]
111125

112126
def save(self, config_path):
127+
"""
128+
save model weights in config_path
129+
Parameter:
130+
----------
131+
config_path: str
132+
"""
113133
with open(config_path, "w", encoding="utf-8") as wf:
114134
json.dump(self.params, wf, ensure_ascii=False, indent=2)
115135

@@ -126,6 +146,7 @@ def load(cls, config_path, *args, **kwargs):
126146

127147
@classmethod
128148
def from_pretrained(cls, name, model_dir=MODEL_DIR, *args, **kwargs):
149+
"""NotImplemented"""
129150
raise NotImplementedError
130151

131152
@property

EduNLP/Vector/disenqnet/disenqnet.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ def infer_vector(self, items: dict, vector_type=None, **kwargs) -> torch.Tensor:
4444

4545
def infer_tokens(self, items: dict, **kwargs) -> torch.Tensor:
4646
embeded, _, _ = self(items)
47+
"""
48+
get tokens embedding with DisenQModel
49+
Parameters
50+
----------
51+
items: dict
52+
{'content_idx': tensor(),'content_len': tensor()}, the tokens about question after tokenizer processing
53+
54+
Returns:
55+
torch.Tensor: token embedding
56+
"""
4757
return embeded
4858

4959
@property

EduNLP/Vector/elmo_vec.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@ def __call__(self, items: dict):
2121
return outputs
2222

2323
def infer_vector(self, items: dict, **kwargs) -> torch.Tensor:
24+
"""
25+
get sentence vector embedding with ElmoModel
26+
Parameters
27+
----------
28+
items: dict, {'seq_idx': tensor(),'seq_len':tensor()}, the tokens about question after tokenizer processing
29+
30+
Returns:
31+
torch.Tensor: sentence embedding
32+
"""
2433
outputs = self(items)
2534
item_embeds = torch.cat(
2635
(outputs.forward_output[torch.arange(len(items["seq_len"])), torch.tensor(items["seq_len"]) - 1],
@@ -29,6 +38,15 @@ def infer_vector(self, items: dict, **kwargs) -> torch.Tensor:
2938
return item_embeds
3039

3140
def infer_tokens(self, items, **kwargs) -> torch.Tensor:
41+
"""
42+
get tokens embedding with ElmoModel
43+
Parameters
44+
----------
45+
items: dict, {'seq_idx': tensor()}, the tokens about question after tokenizer processing
46+
47+
Returns:
48+
torch.Tensor: token embedding
49+
"""
3250
outputs = self(items)
3351
forward_hiddens = outputs.forward_output
3452
backward_hiddens = outputs.backward_output

EduNLP/Vector/gensim_vec.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,32 @@ def __getitem__(self, item):
6666
return self.wv[item] if index not in self.constants.values() else np.zeros((self.vector_size,))
6767

6868
def infer_vector(self, items, agg="mean", **kwargs) -> list:
69+
"""
70+
get sentence embedding with word2vec model
71+
Parameters
72+
----------
73+
item: list, the tokens after tokenizer processing
74+
Return
75+
------
76+
vector: list
77+
[array(), ..., array()]
78+
"""
6979
token_vectors = self.infer_tokens(items, **kwargs)
7080
# return [eval("np.%s" % agg)(item, axis=0) if item else np.array([]) for item in token_vectors]
7181
return [eval("np.%s" % agg)(item, axis=0) if item else np.zeros(self.vector_size,) for item in token_vectors]
7282

7383
def infer_tokens(self, items, **kwargs) -> list:
84+
"""
85+
get token embedding with word2vec model
86+
Parameters
87+
----------
88+
item: list
89+
the tokens after tokenizer processing
90+
Return
91+
------
92+
vector: list
93+
[[array(), ..., array()], [...], [...]]
94+
"""
7495
return [list(self(*item)) for item in items]
7596

7697

@@ -95,6 +116,17 @@ def __init__(self, filepath):
95116
self.dictionary = corpora.Dictionary.load(filepath)
96117

97118
def infer_vector(self, item, return_vec=False):
119+
"""
120+
get Bow vector
121+
Parameters
122+
----------
123+
item: list
124+
the tokens after tokenizer processing
125+
Return
126+
------
127+
vector: list
128+
[array(), ..., array()]
129+
"""
98130
item = self.dictionary.doc2bow(item)
99131
if not return_vec:
100132
return item # return dic as default
@@ -121,6 +153,17 @@ def __init__(self, filepath):
121153
self.dictionary = corpora.Dictionary.load(dictionary_path)
122154

123155
def infer_vector(self, item, return_vec=False):
156+
"""
157+
get Tf-idf vector
158+
Parameters
159+
----------
160+
item: list
161+
the tokens after tokenizer processing
162+
Return
163+
------
164+
vector: list
165+
[array(), ..., array()]
166+
"""
124167
dic_item = self.dictionary.doc2bow(item)
125168
tfidf_item = self.tfidf_model[dic_item]
126169
# return dic as default
@@ -181,7 +224,22 @@ def vector_size(self):
181224
return self.d2v.vector_size
182225

183226
def infer_vector(self, items, *args, **kwargs) -> list:
227+
"""
228+
get vector with D2V model
229+
Parameters
230+
----------
231+
item: list
232+
the tokens after tokenizer processing
233+
Return
234+
------
235+
vector: list
236+
[array(), ..., array()]
237+
"""
184238
return [self(item) for item in items]
185239

186240
def infer_tokens(self, item, *args, **kwargs) -> ...:
241+
"""
242+
get token embeddings with D2V
243+
NotImplemented
244+
"""
187245
raise NotImplementedError

EduNLP/Vector/t2v.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class T2V(object):
4545
4646
Examples
4747
--------
48-
>>> item = [{'ques_content':'有公式$\\FormFigureID{wrong1?}$和公式$\\FormFigureBase64{wrong2?}$,\
48+
>>> item = [{'ques_content':'有公式$\\FormFigureID{wrong1?}$和公式$\\FormFigureBase64{wrong2?}$, \
4949
... 如图$\\FigureID{088f15ea-8b7c-11eb-897e-b46bfc50aa29}$,若$x,y$满足约束条件$\\SIFSep$,则$z=x+7 y$的最大值为$\\SIFBlank$'}]
5050
>>> model_dir = "examples/test_model/d2v"
5151
>>> url, model_name, *args = get_pretrained_model_info('d2v_test_256')
@@ -69,9 +69,24 @@ def __call__(self, items, *args, **kwargs):
6969
return self.i2v.infer_vector(items, *args, **kwargs)
7070

7171
def infer_vector(self, items, *args, **kwargs):
72+
"""
73+
get question embedding with T2V
74+
Parameters
75+
----------
76+
items:list
77+
a list of question
78+
Returns
79+
-------
80+
vector:list
81+
numpy.ndarray([dtype=float32)]
82+
"""
7283
return self.i2v.infer_vector(items, *args, **kwargs)
7384

7485
def infer_tokens(self, items, *args, **kwargs):
86+
"""
87+
get token embeddings with T2V
88+
NotImplemented
89+
"""
7590
return self.i2v.infer_tokens(items, *args, **kwargs)
7691

7792
@property
@@ -80,6 +95,24 @@ def vector_size(self) -> int:
8095

8196

8297
def get_pretrained_model_info(name):
98+
"""
99+
get the pretrained model information with the given name
100+
Parameters
101+
----------
102+
name:str
103+
select the pretrained model
104+
e.g.:
105+
d2v_math_300
106+
w2v_math_300
107+
elmo_math_2048
108+
bert_math_768
109+
bert_taledu_768
110+
disenq_math_256
111+
quesnet_math_512
112+
Returns
113+
--------
114+
list: [model url (where to download), model name]
115+
"""
83116
url = MODELHUB_URL + 'getPretrainedModel'
84117
param = {'name': name}
85118
r = requests.get(url, params=param)
@@ -89,6 +122,14 @@ def get_pretrained_model_info(name):
89122

90123

91124
def get_all_pretrained_models():
125+
"""
126+
get all pretrained models' name
127+
128+
Returns
129+
-------
130+
the pretrained models' name:list
131+
e.g.['bert_bio_ptc', 'bert_geo_ptc', 'bert_math_768', ... ]
132+
"""
92133
url = MODELHUB_URL + 'getPretrainedModelList'
93134
r = requests.get(url)
94135
assert r.status_code == 200, r.status_code

docs/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,31 @@ EduNLP document and tutorial folder
33

44
Requirements
55
------------
6+
67
See the requirements `docs_deps` in `setup.py`:
8+
79
```sh
810
pip install -e .[doc]
911
```
1012

11-
1213
Build documents
1314
---------------
15+
1416
First, clean up existing files:
17+
1518
```
1619
make clean
1720
```
1821

1922
Then build:
23+
2024
```
2125
make html
2226
```
2327

2428
Render locally
2529
--------------
30+
2631
```
2732
cd build/html
2833
python3 -m http.server 8000

docs/source/api/vector.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ EduNLP.Vector.t2v
1010

1111

1212
EduNLP.Vector.disenqnet
13-
--------------------
13+
-------------------------
1414

1515
.. automodule:: EduNLP.Vector.disenqnet.disenqnet
1616
:members:
1717

1818
EduNLP.Vector.quesnet
19-
--------------------
19+
-------------------------
2020

2121
.. automodule:: EduNLP.Vector.quesnet.quesnet
2222
:members:

docs/source/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,5 @@ def copy_tree(src, tar):
114114
'undoc-members': True,
115115
}
116116
autodoc_member_order = 'bysource'
117+
118+
nbsphinx_allow_errors = True

docs/source/tutorial/zh/pipeline.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
流水线
33
=======
44

5-
.. nbgallery::
6-
:caption: This is a thumbnail gallery:
7-
:name: pipleine_gallery
8-
:glob:
5+
.. nbinfo::
6+
notebook:
97

10-
流水线 <../../build/blitz/pipeline/pipeline.ipynb>
8+
`流水线 <../../build/blitz/pipeline/pipeline.ipynb>`_

0 commit comments

Comments
 (0)