Skip to content

Commit f299573

Browse files
committed
Fix ugly bug in the way we sent the sparse vectors to the weka-wrapper and bump version for next release.
1 parent 99f0c5e commit f299573

File tree

6 files changed

+27
-16
lines changed

6 files changed

+27
-16
lines changed

creole.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<!-- creole.xml for Learning_Framework -->
22
<CREOLE-DIRECTORY
33
ID="gate.LearningFramework"
4-
VERSION="3.2"
4+
VERSION="3.3"
55
DESCRIPTION="Learning Framework"
66
HELPURL="https://github.com/GateNLP/gateplugin-LearningFramework/wiki"
77
>

src/gate/plugin/learningframework/data/CorpusRepresentationMallet.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,24 @@ public void savePipe(File directory) {
8989

9090
public abstract void addScaling(ScalingMethod scaleFeatures);
9191

92+
// TODO: need to do this better: make sure if there are thousands of
93+
// features that we only show a subset and the number?
94+
/*
95+
public String toString() {
96+
Alphabet dataAlph = pipe.getDataAlphabet();
97+
Alphabet targetAlph = pipe.getTargetAlphabet();
98+
StringBuilder sb = new StringBuilder();
99+
sb.append("CorpusRepresentationMallet{dataalphabet=");
100+
sb.append(dataAlph.toString());
101+
sb.append(",targetalphabet=");
102+
if(targetAlph==null) {
103+
sb.append("null");
104+
} else {
105+
sb.append(targetAlph.toString());
106+
}
107+
sb.append("}");
108+
return sb.toString();
109+
}
110+
*/
111+
92112
}

src/gate/plugin/learningframework/data/CorpusRepresentationMalletTarget.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public CorpusRepresentationMalletTarget(FeatureInfo fi, ScalingMethod sm, Target
7070
}
7171

7272
/**
73-
* Create a new instance based on the pipe stored in directory.
73+
* Create a new CRMT instance based on the pipe stored in directory.
7474
* @param directory
7575
* @return
7676
*/

src/gate/plugin/learningframework/engines/Engine.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ public static Engine loadEngine(File directory, String parms) {
7777
// representation if necessary.
7878
eng.loadModel(directory, parms);
7979
eng.loadMalletCorpusRepresentation(directory);
80+
//System.err.println("Loaded mallet corpus representation: "+eng.getCorpusRepresentationMallet());
8081

8182
// we could stop growh right after loading, but that would interfere with engines which
8283
// allow updating, incremental learning etc.

src/gate/plugin/learningframework/engines/EngineWekaExternal.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ public EvaluationResult evaluate(String algorithmParameters, EvaluationMethod ev
211211
public List<GateClassification> classify(AnnotationSet instanceAS, AnnotationSet inputAS, AnnotationSet sequenceAS, String parms) {
212212
CorpusRepresentationMalletTarget data = (CorpusRepresentationMalletTarget)corpusRepresentationMallet;
213213
data.stopGrowth();
214-
System.err.println("Running EngineWeka.classify on document "+instanceAS.getDocument().getName());
214+
//System.err.println("Running EngineWeka.classify on document "+instanceAS.getDocument().getName());
215215
List<GateClassification> gcs = new ArrayList<GateClassification>();
216216
LFPipe pipe = (LFPipe)data.getRepresentationMallet().getPipe();
217217
for(Annotation instAnn : instanceAS.inDocumentOrder()) {
@@ -230,8 +230,8 @@ public List<GateClassification> classify(AnnotationSet instanceAS, AnnotationSet
230230
int[] locations = sdv.getLocations();
231231
double[] values = sdv.getValues();
232232
for(int i=0;i<locs;i++) {
233-
locations[i] = fv.location(i);
234-
values[i] = fv.value(i);
233+
locations[i] = fv.indexAtLocation(i);
234+
values[i] = fv.value(locations[i]);
235235
}
236236
// send the vector over to the weka process
237237
process.writeObject(sdv);
@@ -246,7 +246,7 @@ public List<GateClassification> classify(AnnotationSet instanceAS, AnnotationSet
246246
// this is an error, lets panic for now
247247
throw new RuntimeException("Got a response from the Weka process which is not double[] but "+obj.getClass());
248248
}
249-
System.err.println("Sent vector: locs/values="+Arrays.toString(locations)+"/"+Arrays.toString(values)+", ret="+Arrays.toString(ret));
249+
//System.err.println("Sent vector: locs/values="+Arrays.toString(locations)+"/"+Arrays.toString(values)+", ret="+Arrays.toString(ret));
250250
GateClassification gc = null;
251251
// now check if the mallet representation and the weka process agree
252252
// on if we have regression or classification

src/gate/plugin/learningframework/export/CorpusExporterMalletSeq.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,10 @@
55
*/
66
package gate.plugin.learningframework.export;
77

8-
import cc.mallet.pipe.Pipe;
9-
import cc.mallet.types.Alphabet;
10-
import cc.mallet.types.FeatureVector;
11-
import cc.mallet.types.Instance;
128
import cc.mallet.types.InstanceList;
13-
import gate.plugin.learningframework.Globals;
149
import gate.plugin.learningframework.data.CorpusRepresentationMallet;
1510
import gate.plugin.learningframework.engines.Info;
16-
import gate.plugin.learningframework.features.Datatype;
17-
import gate.plugin.learningframework.features.FeatureExtraction;
1811
import java.io.File;
19-
import java.io.FileNotFoundException;
20-
import java.io.FileOutputStream;
21-
import java.io.PrintStream;
2212

2313
/**
2414
*

0 commit comments

Comments
 (0)