1
+ # Tesseract ----
2
+
1
3
if (! file.exists(" ../windows/tesseract/include/tesseract/baseapi.h" )){
2
4
unlink(" ../windows" , recursive = TRUE )
3
5
url <- if (grepl(" aarch" , R.version $ platform )){
4
- " https://github.com/r-windows/bundles /releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-clang-aarch64.tar.xz"
6
+ " https://github.com/pachadotdev/cpp11tesseract /releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-clang-aarch64.tar.xz"
5
7
} else if (grepl(" clang" , Sys.getenv(' R_COMPILED_BY' ))){
6
- " https://github.com/r-windows/bundles /releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-clang-x86_64.tar.xz"
8
+ " https://github.com/pachadotdev/cpp11tesseract /releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-clang-x86_64.tar.xz"
7
9
} else if (getRversion() > = " 4.3" ) {
8
- " https://github.com/r-windows/bundles /releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-ucrt-x86_64.tar.xz"
10
+ " https://github.com/pachadotdev/cpp11tesseract /releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-ucrt-x86_64.tar.xz"
9
11
} else {
10
12
" https://github.com/rwinlib/tesseract/archive/v5.3.2.tar.gz"
11
13
}
@@ -17,17 +19,36 @@ if(!file.exists("../windows/tesseract/include/tesseract/baseapi.h")){
17
19
file.rename(list.files(), ' tesseract' )
18
20
}
19
21
20
- # Also download the english training data
21
22
dir.create(" ../windows/tessdata" , showWarnings = FALSE )
22
23
if (! file.exists(" ../windows/tessdata/eng.traineddata" )){
23
24
message(" Downloading eng.traineddata..." )
24
25
download.file(" https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/eng.traineddata" ,
25
26
" ../windows/tessdata/eng.traineddata" , mode = " wb" , quiet = TRUE )
26
27
}
27
28
28
- # This is base training data for Orientation and Script Detection
29
29
if (! file.exists(" ../windows/tessdata/osd.traineddata" )){
30
30
message(" Downloading osd.traineddata..." )
31
31
download.file(" https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/osd.traineddata" ,
32
32
" ../windows/tessdata/osd.traineddata" , mode = " wb" , quiet = TRUE )
33
33
}
34
+
35
+ # Poppler ----
36
+
37
+ if (! file.exists(" ../windows/poppler/include/poppler/cpp/poppler-document.h" )) {
38
+ unlink(" ../windows" , recursive = TRUE )
39
+ url <- if (grepl(" aarch" , R.version $ platform )) {
40
+ " https://github.com/pachadotdev/cpp11tesseract/releases/download/poppler-23.08.0/poppler-23.08.0-clang-aarch64.tar.xz"
41
+ } else if (grepl(" clang" , Sys.getenv(" R_COMPILED_BY" ))) {
42
+ " https://github.com/pachadotdev/cpp11tesseract/releases/download/poppler-23.08.0/poppler-23.08.0-clang-x86_64.tar.xz"
43
+ } else if (getRversion() > = " 4.3" ) {
44
+ " https://github.com/pachadotdev/cpp11tesseract/releases/download/poppler-23.08.0/poppler-23.08.0-ucrt-x86_64.tar.xz"
45
+ } else {
46
+ " https://github.com/rwinlib/poppler/archive/v22.04.0-2.tar.gz"
47
+ }
48
+ download.file(url , basename(url ), quiet = TRUE )
49
+ dir.create(" ../windows" , showWarnings = FALSE )
50
+ untar(basename(url ), exdir = " ../windows" , tar = " internal" )
51
+ unlink(basename(url ))
52
+ setwd(" ../windows" )
53
+ file.rename(list.files(), " poppler" )
54
+ }
0 commit comments