1
+ # A partial implementation of https://arxiv.org/abs/2109.02157
2
+
3
+ import torch
4
+ import torch .nn as nn
5
+ import torch .nn .functional as F
6
+ from torch .utils .data import Dataset , DataLoader
7
+ import torch .optim as optim
8
+ import torch .optim .lr_scheduler as lr_scheduler
9
+
10
+ # Note: this example requires the napkinXC library: https://napkinxc.readthedocs.io/
11
+ from napkinxc .datasets import load_dataset
12
+ from napkinxc .measures import precision_at_k
13
+
14
+ from tqdm import tqdm
15
+ import torchhd
16
+ from torchhd import embeddings , HRRTensor
17
+ import torchhd .tensors
18
+ from scipy .sparse import vstack , lil_matrix
19
+ import numpy as np
20
+
21
+
22
+ device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
23
+ print ("Using {} device" .format (device ))
24
+
25
+
26
+ DIMENSIONS = 400
27
+ NUMBER_OF_EPOCHS = 1
28
+ BATCH_SIZE = 1
29
+ DATASET_NAME = "eurlex-4k" # tested on "eurlex-4k", and "Wiki10-31K"
30
+ FC_LAYER_SIZE = 512
31
+
32
+
33
+ def sparse_batch_collate (batch :list ):
34
+ """
35
+ Collate function which to transform scipy csr matrix to pytorch sparse tensor
36
+ """
37
+ data_batch , targets_batch = zip (* batch )
38
+
39
+ data_batch = vstack (data_batch ).tocoo ()
40
+ data_batch = torch .sparse_coo_tensor (np .array (data_batch .nonzero ()), data_batch .data , data_batch .shape )
41
+
42
+ targets_batch = torch .stack (targets_batch )
43
+
44
+ return data_batch , targets_batch
45
+
46
+ class multilabel_dataset (Dataset ):
47
+ def __init__ (self ,x ,y ,n_classes ) -> None :
48
+ self .x = x
49
+ self .y = y
50
+ self .n_classes = n_classes
51
+
52
+
53
+ # Define the length of the dataset.
54
+ def __len__ (self ):
55
+ return self .x .shape [0 ]
56
+
57
+ # Return a single sample from the dataset.
58
+ def __getitem__ (self , idx ):
59
+ labels = torch .zeros (self .n_classes , dtype = torch .int64 )
60
+ labels [self .y [idx ]] = 1.0
61
+ return self .x [idx ], labels
62
+
63
+
64
+ X_train , Y_train = load_dataset (DATASET_NAME , "train" , verbose = True )
65
+ X_test , Y_test = load_dataset (DATASET_NAME , "test" , verbose = True )
66
+
67
+
68
+ if DATASET_NAME == "Wiki10-31K" : # Because of this issue https://github.com/mwydmuch/napkinXC/issues/18
69
+ X_train = lil_matrix (X_train [:,:- 1 ])
70
+
71
+ N_features = X_train .shape [1 ]
72
+ N_classes = max (max (classes ) for classes in Y_train if classes != []) + 1
73
+
74
+ train_dataset = multilabel_dataset (X_train ,Y_train ,N_classes )
75
+ train_dataloader = DataLoader (train_dataset ,BATCH_SIZE , collate_fn = sparse_batch_collate )
76
+ test_dataset = multilabel_dataset (X_test ,Y_test ,N_classes )
77
+ test_dataloader = DataLoader (test_dataset ,collate_fn = sparse_batch_collate )
78
+
79
+
80
+ print ("Traning on \033 [1m {} \033 [0m. It has {} features, and {} classes."
81
+ .format (DATASET_NAME ,N_features ,N_classes ))
82
+
83
+
84
+ # Fully Connected model for the baseline comparision
85
+ class FC (nn .Module ):
86
+ def __init__ (self , num_features , num_classes ):
87
+ super (FC , self ).__init__ ()
88
+ self .num_classes = num_classes
89
+ self .num_features = num_features
90
+ self .fc_layer_size = FC_LAYER_SIZE
91
+
92
+ # Network Layers
93
+ self .fc1 = nn .Linear (self .num_features , self .fc_layer_size )
94
+ self .fc2 = nn .Linear (self .fc_layer_size , self .fc_layer_size )
95
+ self .olayer = nn .Linear (self .fc_layer_size , self .num_classes )
96
+
97
+ def forward (self , x ):
98
+ x = F .leaky_relu (self .fc1 (x ))
99
+ x = F .leaky_relu (self .fc2 (x ))
100
+ x = self .olayer (x )
101
+ return x
102
+
103
+ def pred (self , out ,threshold = 0.5 ):
104
+ y = F .sigmoid (out )
105
+ v ,i = y .sort (descending = True )
106
+ ids = i [v >= threshold ]
107
+ ids = ids .tolist ()
108
+ return ids
109
+
110
+ def loss (self ,out ,target ):
111
+ loss = nn .BCEWithLogitsLoss ()(out , target .type (torch .float64 ))
112
+ return loss
113
+
114
+ # Modified version of FC model that returns an HRRTensor with dim << output of the FC model.
115
+ # It makes the model to have fewer parameters
116
+ class FCHRR (nn .Module ):
117
+ def __init__ (self , num_features , num_classes ,dim ):
118
+ super (FCHRR , self ).__init__ ()
119
+ self .num_classes = num_classes
120
+ self .num_features = num_features
121
+ self .fc_layer_size = FC_LAYER_SIZE
122
+ self .dim = dim
123
+
124
+ self .classes_vec = embeddings .Random (N_classes , dim ,vsa = "HRR" )
125
+ n_vec , p_vec = torchhd .HRRTensor .random (2 ,dim )
126
+ self .register_buffer ("n_vec" , n_vec )
127
+ self .register_buffer ("p_vec" , p_vec )
128
+
129
+ # Network Layers
130
+ self .fc1 = nn .Linear (self .num_features , self .fc_layer_size )
131
+ self .fc2 = nn .Linear (self .fc_layer_size , self .fc_layer_size )
132
+ self .olayer = nn .Linear (self .fc_layer_size , dim )
133
+
134
+ def forward (self , x ):
135
+ x = F .leaky_relu (self .fc1 (x ))
136
+ x = F .leaky_relu (self .fc2 (x ))
137
+ x = self .olayer (x )
138
+ return x .as_subclass (HRRTensor )
139
+
140
+ def pred (self , out ,threshold = 0.1 ):
141
+
142
+ tmp_positive = self .p_vec .exact_inverse ().bind (out )
143
+ sims = tmp_positive .cosine_similarity (self .classes_vec .weight )
144
+
145
+ v ,i = sims .sort (descending = True )
146
+ ids = i [v >= threshold ]
147
+ ids = ids .tolist ()
148
+
149
+ return ids
150
+
151
+ def loss (self ,out ,target ):
152
+
153
+ loss = torch .tensor (0 , dtype = torch .float32 ,device = device )
154
+
155
+ tmp_positives = self .p_vec .exact_inverse ().bind (out )
156
+ tmp_negatives = self .n_vec .exact_inverse ().bind (out )
157
+ for i in range (target .shape [0 ]):
158
+
159
+ cp = self .classes_vec .weight [target [i ]== 1 ,:]
160
+
161
+ j_p = (1 - tmp_positives [i ].cosine_similarity (cp )).sum ()
162
+ j_n = tmp_negatives [i ].cosine_similarity (cp .multibundle ())
163
+
164
+ loss += j_p + j_n
165
+
166
+ loss /= target .shape [0 ]
167
+
168
+ return loss
169
+
170
+
171
+
172
+ hrr_model = FCHRR (N_features ,N_classes ,DIMENSIONS )
173
+ hrr_model = hrr_model .to (device )
174
+
175
+ baseline_model = FC (N_features ,N_classes )
176
+ baseline_model = baseline_model .to (device )
177
+
178
+
179
+ for model_name , model in {"HRR-FC" :hrr_model ,"FC" :baseline_model }.items ():
180
+ optimizer = optim .Adam (model .parameters (), lr = 0.001 )
181
+ scheduler = lr_scheduler .StepLR (optimizer , step_size = 1 , gamma = 0.7 )
182
+ model .train ()
183
+ for epoch in tqdm (range (1 ,NUMBER_OF_EPOCHS + 1 ), desc = f"{ model_name } epochs" ,leave = False ):
184
+
185
+ for samples , labels in tqdm (train_dataloader , desc = "Training" ,leave = False ):
186
+ samples = samples .to (device )
187
+ labels = labels .to (device )
188
+ optimizer .zero_grad ()
189
+ out = model (samples )
190
+ loss = model .loss (out , labels )
191
+ loss .backward ()
192
+ optimizer .step ()
193
+
194
+ scheduler .step ()
195
+
196
+ Y_pred = []
197
+ model .eval ()
198
+ with torch .no_grad ():
199
+ for data , target in tqdm (test_dataloader ,desc = "Validating" ,leave = False ):
200
+ data , target = data .to (device ).float (), target .to (device )
201
+ out = model (data )
202
+ ids = model .pred (out )
203
+ Y_pred .append (ids )
204
+
205
+ # Calculating the P@1 metric
206
+ p_at_1 = precision_at_k (Y_test , Y_pred , k = 1 )[0 ]
207
+ print ("Result of {} model ----> P@1 = {}" .format (model_name , p_at_1 ))
0 commit comments