From 82d9df2fa129f308bcdc1c3d316ee0260d854da7 Mon Sep 17 00:00:00 2001
From: Martin Monperrus <monperrus@users.noreply.github.com>
Date: Fri, 24 Apr 2020 08:57:17 +0000
Subject: [PATCH 1/2] Update TrainingTesseract-4.00.md

---
 TrainingTesseract-4.00.md | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/TrainingTesseract-4.00.md b/TrainingTesseract-4.00.md
index 80170b5..9522052 100644
--- a/TrainingTesseract-4.00.md
+++ b/TrainingTesseract-4.00.md
@@ -295,7 +295,7 @@ The following table describes its command-line options:
 | `sequential_training`  | `bool`   | `false`     | Set to true for sequential training. Default is to process all training data in round-robin fashion. |
 | `net_mode`             | `int`    | `192`       | Flags from `NetworkFlags`in `network.h`. Possible values: `128` for Adam optimization instead of momentum; `64` to allow different layers to have their own learning  rates, discovered automatically. |
 | `perfect_sample_delay` | `int`    | `0`         | When the network gets good, only backprop a perfect sample after this many imperfect samples have been seen since the last perfect sample was allowed through. |
-| `debug_interval`       | `int`    | `0`         | If non-zero, show visual debugging every this many iterations. |
+| `debug_interval`       | `int`    | `0`         | If non-zero, show visual debugging every this many iterations (requires Java and ScrollView.jar). |
 | `weight_range`         | `double` | `0.1`       | Range of random values to initialize weights. |
 | `momentum`             | `double` | `0.5`       | Momentum for alpha smoothing gradients. |
 | `adam_beta`            | `double` | `0.999`     | Smoothing factor squared gradients in ADAM algorithm. |
@@ -635,14 +635,16 @@ Training data is created using [tesstrain.sh](https://github.com/tesseract-ocr/t
 as follows:
 
 ```
+cd tessdata
+wget https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata
 src/training/tesstrain.sh --fonts_dir /usr/share/fonts --lang eng --linedata_only \
-  --noextract_font_properties --langdata_dir ../langdata \
+  --noextract_font_properties --langdata_dir ~/tesstutorial/langdata \
   --tessdata_dir ./tessdata --output_dir ~/tesstutorial/engtrain
 ```
-And the following is printed out after a successful run:
+And the following is printed out after a successful run, `Created starter traineddata for LSTM training of language 'eng'`, file `~/tesstutorial/engeval/*.lstmf` have been created, you can the jump to section "Training From Scratch" below.
+
 ```
-Created starter traineddata for LSTM training of language 'eng'
-Run 'lstmtraining' command to continue LSTM training for language 'eng'
+./lstmtraining
 ```
 
 The above command makes LSTM training data equivalent to the data used to train
@@ -653,7 +655,7 @@ Now try this to make eval data for the 'Impact' font:
 
 ```
 src/training/tesstrain.sh --fonts_dir /usr/share/fonts --lang eng --linedata_only \
-  --noextract_font_properties --langdata_dir ../langdata \
+  --noextract_font_properties --langdata_dir ~/tesstutorial/langdata \
   --tessdata_dir ./tessdata \
   --fontlist "Impact Condensed" --output_dir ~/tesstutorial/engeval
 ```
@@ -688,9 +690,11 @@ The following example shows the command line for training from scratch. Try it
 with the default training data created with the command-lines above.
 
 ```
+mkdir ~/tesstutorial/engtrain/
+find ~/tesstutorial/engeval -name "*.lstmf" > ~/tesstutorial/engtrain/eng.training_files.txt
 mkdir -p ~/tesstutorial/engoutput
-training/lstmtraining --debug_interval 100 \
-  --traineddata ~/tesstutorial/engtrain/eng/eng.traineddata \
+training/lstmtraining \
+  --traineddata tessdata/eng.traineddata \
   --net_spec '[1,36,0,1 Ct3,3,16 Mp3,3 Lfys48 Lfx96 Lrx96 Lfx256 O1c111]' \
   --model_output ~/tesstutorial/engoutput/base --learning_rate 20e-4 \
   --train_listfile ~/tesstutorial/engtrain/eng.training_files.txt \
@@ -1208,4 +1212,4 @@ If you notice that your model is misbehaving, for example by:
 * Adding `Space` where it should not do that.
 * etc...
 
-[Then read the hallucination topic.](The-Hallucination-Effect.md)
\ No newline at end of file
+[Then read the hallucination topic.](The-Hallucination-Effect.md)

From c6870a82c6e77f1e853c8b99d2bdcda29792d347 Mon Sep 17 00:00:00 2001
From: Martin Monperrus <monperrus@users.noreply.github.com>
Date: Fri, 24 Apr 2020 08:58:59 +0000
Subject: [PATCH 2/2] Update TrainingTesseract-4.00.md

---
 TrainingTesseract-4.00.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/TrainingTesseract-4.00.md b/TrainingTesseract-4.00.md
index 9522052..7153703 100644
--- a/TrainingTesseract-4.00.md
+++ b/TrainingTesseract-4.00.md
@@ -643,10 +643,6 @@ src/training/tesstrain.sh --fonts_dir /usr/share/fonts --lang eng --linedata_onl
 ```
 And the following is printed out after a successful run, `Created starter traineddata for LSTM training of language 'eng'`, file `~/tesstutorial/engeval/*.lstmf` have been created, you can the jump to section "Training From Scratch" below.
 
-```
-./lstmtraining
-```
-
 The above command makes LSTM training data equivalent to the data used to train
 base Tesseract for English. For making a general-purpose LSTM-based OCR engine,
 it is woefully inadequate, but makes a good tutorial demo.
@@ -693,7 +689,7 @@ with the default training data created with the command-lines above.
 mkdir ~/tesstutorial/engtrain/
 find ~/tesstutorial/engeval -name "*.lstmf" > ~/tesstutorial/engtrain/eng.training_files.txt
 mkdir -p ~/tesstutorial/engoutput
-training/lstmtraining \
+src/training/lstmtraining \
   --traineddata tessdata/eng.traineddata \
   --net_spec '[1,36,0,1 Ct3,3,16 Mp3,3 Lfys48 Lfx96 Lrx96 Lfx256 O1c111]' \
   --model_output ~/tesstutorial/engoutput/base --learning_rate 20e-4 \