Skip to content

Commit 66b77e6

Browse files
committed
Prepare using float instead of double for LSTM calculations
The new header file ccutils/tesstypes.h also prepares support for larger images by introducing a new data type for image size and coordinates (still unused). FloatToDouble is now a local function. Signed-off-by: Stefan Weil <sw@weilnetz.de>
1 parent c3fb050 commit 66b77e6

27 files changed

+265
-221
lines changed

Makefile.am

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,38 +150,44 @@ endif
150150
if MARCH_NATIVE_OPT
151151
libtesseract_native_la_CXXFLAGS += -march=native -mtune=native
152152
endif
153+
libtesseract_native_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
153154
libtesseract_native_la_SOURCES = src/arch/dotproduct.cpp
154155

155156
if HAVE_AVX
156157
libtesseract_avx_la_CXXFLAGS = -mavx
158+
libtesseract_avx_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
157159
libtesseract_avx_la_SOURCES = src/arch/dotproductavx.cpp
158160
libtesseract_la_LIBADD += libtesseract_avx.la
159161
noinst_LTLIBRARIES += libtesseract_avx.la
160162
endif
161163

162164
if HAVE_AVX2
163165
libtesseract_avx2_la_CXXFLAGS = -mavx2
166+
libtesseract_avx2_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
164167
libtesseract_avx2_la_SOURCES = src/arch/intsimdmatrixavx2.cpp
165168
libtesseract_la_LIBADD += libtesseract_avx2.la
166169
noinst_LTLIBRARIES += libtesseract_avx2.la
167170
endif
168171

169172
if HAVE_FMA
170173
libtesseract_fma_la_CXXFLAGS = -mfma
174+
libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
171175
libtesseract_fma_la_SOURCES = src/arch/dotproductfma.cpp
172176
libtesseract_la_LIBADD += libtesseract_fma.la
173177
noinst_LTLIBRARIES += libtesseract_fma.la
174178
endif
175179

176180
if HAVE_SSE4_1
177181
libtesseract_sse_la_CXXFLAGS = -msse4.1
182+
libtesseract_sse_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
178183
libtesseract_sse_la_SOURCES = src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp
179184
libtesseract_la_LIBADD += libtesseract_sse.la
180185
noinst_LTLIBRARIES += libtesseract_sse.la
181186
endif
182187

183188
if HAVE_NEON
184189
libtesseract_neon_la_CXXFLAGS = $(NEON_CXXFLAGS)
190+
libtesseract_neon_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
185191
libtesseract_neon_la_SOURCES = src/arch/intsimdmatrixneon.cpp
186192
libtesseract_la_LIBADD += libtesseract_neon.la
187193
noinst_LTLIBRARIES += libtesseract_neon.la

src/arch/dotproduct.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919
namespace tesseract {
2020

2121
// Computes and returns the dot product of the two n-vectors u and v.
22-
double DotProductNative(const double *u, const double *v, int n) {
23-
double total = 0.0;
22+
TFloat DotProductNative(const TFloat *u, const TFloat *v, int n) {
23+
TFloat total = 0;
2424
#if defined(OPENMP_SIMD) || defined(_OPENMP)
2525
#pragma omp simd reduction(+:total)
2626
#endif
27-
for (int k = 0; k < n; ++k) {
27+
for (int k = 0; k < n; k++) {
2828
total += u[k] * v[k];
2929
}
3030
return total;

src/arch/dotproduct.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,21 @@
1717
#ifndef TESSERACT_ARCH_DOTPRODUCT_H_
1818
#define TESSERACT_ARCH_DOTPRODUCT_H_
1919

20+
#include "tesstypes.h"
21+
2022
namespace tesseract {
2123

2224
// Computes and returns the dot product of the n-vectors u and v.
23-
double DotProductNative(const double *u, const double *v, int n);
25+
TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
2426

2527
// Uses Intel AVX intrinsics to access the SIMD instruction set.
26-
double DotProductAVX(const double *u, const double *v, int n);
28+
TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);
2729

2830
// Use Intel FMA.
29-
double DotProductFMA(const double *u, const double *v, int n);
31+
TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);
3032

3133
// Uses Intel SSE intrinsics to access the SIMD instruction set.
32-
double DotProductSSE(const double *u, const double *v, int n);
34+
TFloat DotProductSSE(const TFloat *u, const TFloat *v, int n);
3335

3436
} // namespace tesseract.
3537

src/arch/intsimdmatrix.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
7676
// u is imagined to have an extra element at the end with value 1, to
7777
// implement the bias, but it doesn't actually have it.
7878
void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
79-
const std::vector<double> &scales, const int8_t *u, double *v) {
79+
const std::vector<TFloat> &scales, const int8_t *u, TFloat *v) {
8080
int num_out = w.dim1();
8181
int num_in = w.dim2() - 1;
8282
// Base implementation.

src/arch/intsimdmatrix.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <cstdint>
2424
#include <vector>
2525

26+
#include "tesstypes.h"
27+
2628
namespace tesseract {
2729

2830
template <class T>
@@ -78,8 +80,8 @@ struct TESS_API IntSimdMatrix {
7880
// u is imagined to have an extra element at the end with value 1, to
7981
// implement the bias, but it doesn't actually have it.
8082
// Computes the base C++ implementation.
81-
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<double> &scales,
82-
const int8_t *u, double *v);
83+
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<TFloat> &scales,
84+
const int8_t *u, TFloat *v);
8385

8486
// Rounds the input up to a multiple of the given factor.
8587
static int Roundup(int input, int factor) {
@@ -95,8 +97,8 @@ struct TESS_API IntSimdMatrix {
9597
// RoundInputs above.
9698
// The input will be over-read to the extent of the padding. There are no
9799
// alignment requirements.
98-
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const double *, const int8_t *,
99-
double *);
100+
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const TFloat *, const int8_t *,
101+
TFloat *);
100102
MatrixDotVectorFunction matrixDotVectorFunction;
101103

102104
// Number of 32 bit outputs held in each register.

src/arch/intsimdmatrixneon.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#if defined(__ARM_NEON)
2020

2121
# include "intsimdmatrix.h"
22+
# include "tesstypes.h"
2223

2324
# include <algorithm>
2425
# include <cstdint>
@@ -52,9 +53,9 @@ constexpr int kNumInputsPerGroup = 8;
5253
// u must be padded out with zeros to
5354
// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements.
5455
static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
55-
const double *__restrict scales,
56+
const TFloat *__restrict scales,
5657
const int8_t *__restrict u, int num_in,
57-
double *__restrict v, int num_out) {
58+
TFloat *__restrict v, int num_out) {
5859
// Initialize all the results to 0.
5960
int32x4_t result0123 = {0, 0, 0, 0};
6061
int32x4_t result4567 = {0, 0, 0, 0};
@@ -163,8 +164,8 @@ static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
163164
}
164165
}
165166

166-
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
167-
const int8_t *u, double *v) {
167+
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
168+
const int8_t *u, TFloat *v) {
168169
const int num_out = dim1;
169170
const int num_in = dim2 - 1;
170171
// Each call to a partial_func_ produces group_size outputs, except the
@@ -196,7 +197,8 @@ const IntSimdMatrix IntSimdMatrix::intSimdMatrixNEON = {
196197
// Number of 8 bit inputs in the inputs register.
197198
kNumInputsPerRegister,
198199
// Number of inputs in each weight group.
199-
kNumInputsPerGroup};
200+
kNumInputsPerGroup
201+
};
200202

201203
} // namespace tesseract.
202204

src/arch/intsimdmatrixsse.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ static int32_t IntDotProductSSE(const int8_t *u, const int8_t *v, int n) {
6969
}
7070

7171
// Computes part of matrix.vector v = Wu. Computes 1 result.
72-
static void PartialMatrixDotVector1(const int8_t *wi, const double *scales, const int8_t *u,
73-
int num_in, double *v) {
74-
double total = IntDotProductSSE(u, wi, num_in);
72+
static void PartialMatrixDotVector1(const int8_t *wi, const TFloat *scales, const int8_t *u,
73+
int num_in, TFloat *v) {
74+
TFloat total = IntDotProductSSE(u, wi, num_in);
7575
// Add in the bias and correct for integer values.
7676
*v = (total + wi[num_in] * INT8_MAX) * *scales;
7777
}
7878

79-
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
80-
const int8_t *u, double *v) {
79+
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
80+
const int8_t *u, TFloat *v) {
8181
const int num_out = dim1;
8282
const int num_in = dim2 - 1;
8383
int output = 0;
@@ -99,7 +99,8 @@ const IntSimdMatrix IntSimdMatrix::intSimdMatrixSSE = {
9999
// Number of 8 bit inputs in the inputs register.
100100
1,
101101
// Number of inputs in each weight group.
102-
1};
102+
1
103+
};
103104

104105
} // namespace tesseract.
105106

src/arch/simddetect.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,26 +93,26 @@ bool SIMDDetect::sse_available_;
9393
#endif
9494

9595
#if defined(HAVE_FRAMEWORK_ACCELERATE)
96-
static double DotProductAccelerate(const double* u, const double* v, int n) {
97-
double total = 0.0;
96+
static TFloat DotProductAccelerate(const TFloat* u, const TFloat* v, int n) {
97+
TFloat total = 0;
9898
const int stride = 1;
9999
vDSP_dotprD(u, stride, v, stride, &total, n);
100100
return total;
101101
}
102102
#endif
103103

104104
// Computes and returns the dot product of the two n-vectors u and v.
105-
static double DotProductGeneric(const double *u, const double *v, int n) {
106-
double total = 0.0;
105+
static TFloat DotProductGeneric(const TFloat *u, const TFloat *v, int n) {
106+
TFloat total = 0;
107107
for (int k = 0; k < n; ++k) {
108108
total += u[k] * v[k];
109109
}
110110
return total;
111111
}
112112

113113
// Compute dot product using std::inner_product.
114-
static double DotProductStdInnerProduct(const double *u, const double *v, int n) {
115-
return std::inner_product(u, u + n, v, 0.0);
114+
static TFloat DotProductStdInnerProduct(const TFloat *u, const TFloat *v, int n) {
115+
return std::inner_product(u, u + n, v, static_cast<TFloat>(0));
116116
}
117117

118118
static void SetDotProduct(DotProductFunction f, const IntSimdMatrix *m = nullptr) {

src/arch/simddetect.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
#define TESSERACT_ARCH_SIMDDETECT_H_
1919

2020
#include <tesseract/export.h>
21+
#include "tesstypes.h"
2122

2223
namespace tesseract {
2324

2425
// Function pointer for best calculation of dot product.
25-
using DotProductFunction = double (*)(const double *, const double *, int);
26+
using DotProductFunction = TFloat (*)(const TFloat *, const TFloat *, int);
2627
extern DotProductFunction DotProduct;
2728

2829
// Architecture detector. Add code here to detect any other architectures for

src/ccutil/tesstypes.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
///////////////////////////////////////////////////////////////////////
2+
// File: tesstypes.h
3+
// Description: Simple data types used by Tesseract code.
4+
// Author: Stefan Weil
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
///////////////////////////////////////////////////////////////////////
16+
17+
#ifndef TESSERACT_TESSTYPES_H
18+
#define TESSERACT_TESSTYPES_H
19+
20+
#include <cstdint> // for int16_t
21+
22+
namespace tesseract {
23+
24+
// Image dimensions (width and height, coordinates).
25+
using TDimension = int16_t;
26+
27+
// Floating point data type used for LSTM calculations.
28+
using TFloat = double;
29+
30+
}
31+
32+
#endif // TESSERACT_TESSTYPES_H

0 commit comments

Comments
 (0)