11// latbin/lattice-compose.cc
22
33// Copyright 2009-2011 Microsoft Corporation; Saarland University
4+ // 2022 Brno University of Technology
45
56// See ../../COPYING for clarification regarding multiple authors
67//
1718// See the Apache 2 License for the specific language governing permissions and
1819// limitations under the License.
1920
20-
2121#include " base/kaldi-common.h"
2222#include " util/common-utils.h"
2323#include " fstext/fstext-lib.h"
@@ -39,22 +39,34 @@ int main(int argc, char *argv[]) {
3939 " or lattices with FSTs (rspecifiers are assumed to be lattices, and\n "
4040 " rxfilenames are assumed to be FSTs, which have their weights interpreted\n "
4141 " as \" graph weights\" when converted into the Lattice format.\n "
42+ " Or, rspecifier can be ark of biasing FSTs, see --compose-with-fst=true.\n "
4243 " \n "
4344 " Usage: lattice-compose [options] lattice-rspecifier1 "
4445 " (lattice-rspecifier2|fst-rxfilename2) lattice-wspecifier\n "
4546 " e.g.: lattice-compose ark:1.lats ark:2.lats ark:composed.lats\n "
46- " or: lattice-compose ark:1.lats G.fst ark:composed.lats\n " ;
47+ " or: lattice-compose ark:1.lats G.fst ark:composed.lats\n "
48+ " or: lattice-compose --compose-with-fst=true ark:1.lats\n "
49+ " ark:biasing.fsts ark:composed.lats\n " ;
4750
4851 ParseOptions po (usage);
4952
5053 bool write_compact = true ;
5154 int32 num_states_cache = 50000 ;
5255 int32 phi_label = fst::kNoLabel ; // == -1
56+ int32 rho_label = fst::kNoLabel ; // == -1
57+ std::string compose_with_fst = " auto" ;
58+
5359 po.Register (" write-compact" , &write_compact, " If true, write in normal (compact) form." );
5460 po.Register (" phi-label" , &phi_label, " If >0, the label on backoff arcs of the LM" );
61+ po.Register (" rho-label" , &rho_label,
62+ " If >0, the label to forward fst1 paths not present biasing graph fst2. "
63+ " (rho is input and output symbol on special arc in biasing graph)" );
5564 po.Register (" num-states-cache" , &num_states_cache,
5665 " Number of states we cache when mapping LM FST to lattice type. "
5766 " More -> more memory but faster." );
67+ po.Register (" compose-with-fst" , &compose_with_fst,
68+ " (true|false|auto) For auto arg2 is: rspecifier=lats, rxfilename=fst "
69+ " (old behavior), for true/false rspecifier is fst/lattice." );
5870 po.Read (argc, argv);
5971
6072 if (po.NumArgs () != 3 ) {
@@ -63,14 +75,30 @@ int main(int argc, char *argv[]) {
6375 }
6476
6577 KALDI_ASSERT (phi_label > 0 || phi_label == fst::kNoLabel ); // e.g. 0 not allowed.
78+ KALDI_ASSERT (rho_label > 0 || rho_label == fst::kNoLabel ); // e.g. 0 not allowed.
79+ if (phi_label > 0 && rho_label > 0 ) {
80+ KALDI_ERR << " You cannot set both 'phi_label' and 'rho_label' at the same time." ;
81+ }
82+
83+ { // convert 'compose_with_fst' to lowercase to support: true, True, TRUE
84+ std::string tmp_lc (compose_with_fst);
85+ std::transform (compose_with_fst.begin (), compose_with_fst.end (),
86+ tmp_lc.begin (), ::tolower); // lc
87+ compose_with_fst.swap (tmp_lc);
88+ }
89+ if (compose_with_fst != " auto" && compose_with_fst != " true" &&
90+ compose_with_fst != " false" ) {
91+ KALDI_ERR << " Unkown 'compose_with_fst' value : " << compose_with_fst
92+ << " , values are (auto|true|false)" ;
93+ }
6694
6795 std::string lats_rspecifier1 = po.GetArg (1 ),
6896 arg2 = po.GetArg (2 ),
6997 lats_wspecifier = po.GetArg (3 );
7098 int32 n_done = 0 , n_fail = 0 ;
7199
72100 SequentialLatticeReader lattice_reader1 (lats_rspecifier1);
73-
101+
74102 CompactLatticeWriter compact_lattice_writer;
75103 LatticeWriter lattice_writer;
76104
@@ -79,33 +107,48 @@ int main(int argc, char *argv[]) {
79107 else
80108 lattice_writer.Open (lats_wspecifier);
81109
82- if (ClassifyRspecifier (arg2, NULL , NULL ) == kNoRspecifier ) {
110+ bool arg2_is_rxfilename = (ClassifyRspecifier (arg2, NULL , NULL ) == kNoRspecifier );
111+
112+ /* *
113+ * arg2 is rxfilename that contains a single fst
114+ * - compose arg1 lattices with single fst in arg2
115+ */
116+ if (arg2_is_rxfilename && (compose_with_fst == " auto" || compose_with_fst == " true" )) {
83117 std::string fst_rxfilename = arg2;
84- VectorFst<StdArc> *fst2 = fst::ReadFstKaldi (fst_rxfilename);
85- // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
86- // with all the cost on the first member of the pair (since we're
87- // assuming it's a graph weight).
118+ VectorFst<StdArc>* fst2 = fst::ReadFstKaldi (fst_rxfilename);
119+
120+ // Make sure fst2 is sorted on ilabel
88121 if (fst2->Properties (fst::kILabelSorted , true ) == 0 ) {
89- // Make sure fst2 is sorted on ilabel.
90122 fst::ILabelCompare<StdArc> ilabel_comp;
91123 ArcSort (fst2, ilabel_comp);
92124 }
125+
93126 if (phi_label > 0 )
94127 PropagateFinal (phi_label, fst2);
95128
129+ // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
130+ // with all the cost on the first member of the pair (since we're
131+ // assuming it's a graph weight).
96132 fst::CacheOptions cache_opts (true , num_states_cache);
97133 fst::MapFstOptions mapfst_opts (cache_opts);
98134 fst::StdToLatticeMapper<BaseFloat> mapper;
99135 fst::MapFst<StdArc, LatticeArc, fst::StdToLatticeMapper<BaseFloat> >
100136 mapped_fst2 (*fst2, mapper, mapfst_opts);
137+
101138 for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
102139 std::string key = lattice_reader1.Key ();
103140 KALDI_VLOG (1 ) << " Processing lattice for key " << key;
104141 Lattice lat1 = lattice_reader1.Value ();
105142 ArcSort (&lat1, fst::OLabelCompare<LatticeArc>());
143+
106144 Lattice composed_lat;
107- if (phi_label > 0 ) PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
108- else Compose (lat1, mapped_fst2, &composed_lat);
145+ if (phi_label > 0 ) {
146+ PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
147+ } else if (rho_label > 0 ) {
148+ RhoCompose (lat1, mapped_fst2, rho_label, &composed_lat);
149+ } else {
150+ Compose (lat1, mapped_fst2, &composed_lat);
151+ }
109152 if (composed_lat.Start () == fst::kNoStateId ) {
110153 KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
111154 n_fail++;
@@ -121,7 +164,23 @@ int main(int argc, char *argv[]) {
121164 }
122165 }
123166 delete fst2;
124- } else {
167+ }
168+
169+ /* *
170+ * arg2 is rxfilename that contains a single lattice
171+ */
172+ else if (arg2_is_rxfilename && compose_with_fst == " false" ) {
173+ // Would it make sense to do this? Not implementing...
174+ KALDI_ERR << " Unimplemented..." ;
175+ }
176+
177+ /* *
178+ * arg2 is rspecifier that contains a table of lattices
179+ * - composing arg1 lattices with arg2 lattices
180+ */
181+ else if (not arg2_is_rxfilename &&
182+ (compose_with_fst == " auto" || compose_with_fst == " false" )) {
183+ //
125184 std::string lats_rspecifier2 = arg2;
126185 // This is the case similar to lattice-interp.cc, where we
127186 // read in another set of lattices and compose them. But in this
@@ -139,6 +198,7 @@ int main(int argc, char *argv[]) {
139198 n_fail++;
140199 continue ;
141200 }
201+
142202 Lattice lat2 = lattice_reader2.Value (key);
143203 // Make sure that either lat2 is ilabel sorted
144204 // or lat1 is olabel sorted, to ensure that
@@ -150,29 +210,108 @@ int main(int argc, char *argv[]) {
150210 fst::ArcSort (&lat2, ilabel_comp);
151211 }
152212
153- Lattice lat_out;
213+ Lattice composed_lat;
214+ // Btw, can the lat2 lattice contin phi/rho symbols ?
154215 if (phi_label > 0 ) {
155216 PropagateFinal (phi_label, &lat2);
156- PhiCompose (lat1, lat2, phi_label, &lat_out);
217+ PhiCompose (lat1, lat2, phi_label, &composed_lat);
218+ } else if (rho_label > 0 ) {
219+ RhoCompose (lat1, lat2, rho_label, &composed_lat);
220+ } else {
221+ Compose (lat1, lat2, &composed_lat);
222+ }
223+ if (composed_lat.Start () == fst::kNoStateId ) {
224+ KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
225+ n_fail++;
226+ } else {
227+ if (write_compact) {
228+ CompactLattice clat;
229+ ConvertLattice (composed_lat, &clat);
230+ compact_lattice_writer.Write (key, clat);
231+ } else {
232+ lattice_writer.Write (key, composed_lat);
233+ }
234+ n_done++;
235+ }
236+ }
237+ }
238+
239+ /* *
240+ * arg2 is rspecifier that contains a table of fsts
241+ * - composing arg1 lattices with arg2 fsts
242+ */
243+ else if (not arg2_is_rxfilename && compose_with_fst == " true" ) {
244+ std::string fst_rspecifier2 = arg2;
245+ RandomAccessTableReader<fst::VectorFstHolder> fst_reader2 (fst_rspecifier2);
246+
247+ for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
248+ std::string key = lattice_reader1.Key ();
249+ KALDI_VLOG (1 ) << " Processing lattice for key " << key;
250+ Lattice lat1 = lattice_reader1.Value ();
251+ lattice_reader1.FreeCurrent ();
252+
253+ if (!fst_reader2.HasKey (key)) {
254+ KALDI_WARN << " Not producing output for utterance " << key
255+ << " because not present in second table." ;
256+ n_fail++;
257+ continue ;
258+ }
259+
260+ VectorFst<StdArc> fst2 = fst_reader2.Value (key);
261+ // Make sure fst2 is sorted on ilabel
262+ if (fst2.Properties (fst::kILabelSorted , true ) == 0 ) {
263+ fst::ILabelCompare<StdArc> ilabel_comp;
264+ fst::ArcSort (&fst2, ilabel_comp);
265+ }
266+
267+ // for composing with LM-fsts, it makes all fst2 states final
268+ if (phi_label > 0 )
269+ PropagateFinal (phi_label, &fst2);
270+
271+ // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
272+ // with all the cost on the first member of the pair (since we're
273+ // assuming it's a graph weight).
274+ fst::CacheOptions cache_opts (true , num_states_cache);
275+ fst::MapFstOptions mapfst_opts (cache_opts);
276+ fst::StdToLatticeMapper<BaseFloat> mapper;
277+ fst::MapFst<StdArc, LatticeArc, fst::StdToLatticeMapper<BaseFloat> >
278+ mapped_fst2 (fst2, mapper, mapfst_opts);
279+
280+ // sort lat1 on olabel.
281+ ArcSort (&lat1, fst::OLabelCompare<LatticeArc>());
282+
283+ Lattice composed_lat;
284+ if (phi_label > 0 ) {
285+ PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
286+ } else if (rho_label > 0 ) {
287+ RhoCompose (lat1, mapped_fst2, rho_label, &composed_lat);
157288 } else {
158- Compose (lat1, lat2 , &lat_out );
289+ Compose (lat1, mapped_fst2 , &composed_lat );
159290 }
160- if (lat_out.Start () == fst::kNoStateId ) {
291+
292+ if (composed_lat.Start () == fst::kNoStateId ) {
161293 KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
162294 n_fail++;
163295 } else {
164296 if (write_compact) {
165- CompactLattice clat_out ;
166- ConvertLattice (lat_out , &clat_out );
167- compact_lattice_writer.Write (key, clat_out );
297+ CompactLattice clat ;
298+ ConvertLattice (composed_lat , &clat );
299+ compact_lattice_writer.Write (key, clat );
168300 } else {
169- lattice_writer.Write (key, lat_out );
301+ lattice_writer.Write (key, composed_lat );
170302 }
171303 n_done++;
172304 }
173305 }
174306 }
175307
308+ /* *
309+ * none of the 'if-else-if' applied...
310+ */
311+ else {
312+ KALDI_ERR << " You should never reach here..." ;
313+ }
314+
176315 KALDI_LOG << " Done " << n_done << " lattices; failed for "
177316 << n_fail;
178317 return (n_done != 0 ? 0 : 1 );
0 commit comments