Skip to content

Commit cb3cafd

Browse files
committed
trying to build my own bundles
1 parent 2a79399 commit cb3cafd

File tree

9 files changed

+194
-0
lines changed

9 files changed

+194
-0
lines changed

.Rbuildignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,7 @@ vignettes/.*\.png$
2323
^inst/paper$
2424
^_pkgdown\.yml$
2525
^dev$
26+
^build\.sh$
27+
^poppler\.sh$
28+
^tesseract\.sh$
29+
^lib$
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
on:
2+
workflow_dispatch:
3+
inputs:
4+
package:
5+
description: 'Which package to bundle'
6+
required: true
7+
overwrite:
8+
description: 'Overwrite existing bundles'
9+
type: boolean
10+
required: false
11+
default: false
12+
13+
name: Create Windows bundles
14+
15+
jobs:
16+
Bundler:
17+
runs-on: windows-latest
18+
name: Bundle ${{ github.event.inputs.package }}
19+
steps:
20+
- name: Prepare git
21+
run: |
22+
git config --global core.autocrlf false
23+
git config --global user.email 'm.sepulveda@mail.utoronto.ca'
24+
git config --global user.name 'Mauricio Pacha Vargas Sepulveda'
25+
26+
- uses: msys2/setup-msys2@v2
27+
28+
- uses: actions/checkout@v4
29+
with:
30+
fetch-depth: 0
31+
32+
- id: bundle
33+
name: Create bundle
34+
shell: msys2 {0}
35+
run: ./build.sh
36+
env:
37+
package: ${{ github.event.inputs.package }}
38+
overwrite: ${{ github.event.inputs.overwrite }}
39+
40+
- name: Release
41+
uses: softprops/action-gh-release@v1
42+
with:
43+
tag_name: ${{ github.event.inputs.package }}-${{ steps.bundle.outputs.version }}
44+
files: dist/*.xz
File renamed without changes.

build.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/sh
2+
source lib/functions.sh
3+
if [ -f "${package}.sh" ]; then
4+
source ./${package}.sh
5+
fi
6+
create_bundles

lib/functions.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/sh
2+
set -e
3+
4+
prepare_pacman(){
5+
cp -f pacman.conf /etc/pacman.conf
6+
pacman -Scc --noconfirm
7+
pacman -Syy --noconfirm
8+
}
9+
10+
arch_prefix(){
11+
echo "$@" | sed "s/[^ ]*/mingw-w64-${arch}-&/g"
12+
}
13+
14+
skip_args(){
15+
echo "$@" | sed "s/[^ ]*/--assume-installed=mingw-w64-${arch}-&=99.99/g"
16+
}
17+
18+
download_libs(){
19+
pkg=$(arch_prefix $package)
20+
version=$(pacman -Si $pkg | grep -m 1 '^Version' | awk '/^Version/{print $3}' | cut -d '-' -f1)
21+
skiplist=$(skip_args gcc-libs libiconv libwinpthread-git libwinpthread)
22+
echo "Bundling: $pkg $version"
23+
#echo "Skiplist: $skiplist"
24+
25+
# Check if version already exists
26+
tagurl="https://github.com/pachadotdev/tesseract/releases/tag/$package-$version"
27+
status=$(curl -sSLIw '%{http_code}' -o /dev/null $tagurl);
28+
if [ "$status" = "404" ]; then
29+
echo "Tag does not exist yet. Good."
30+
else
31+
echo "Tag already exists: $tagurl"
32+
if [ "$overwrite" = "true" ]; then
33+
echo "Overwriting as requested"
34+
else
35+
exit 1
36+
fi
37+
fi
38+
39+
# Find dependencies
40+
if [ "$deps" ]; then
41+
pkgdeps=$(arch_prefix $deps)
42+
URLS=$(pacman -Spdd $pkg $pkgdeps --cache=$OUTPUT)
43+
else
44+
URLS=$(pacman -Sp $pkg $skiplist --cache=$OUTPUT)
45+
#pkgdeps=$(pacman -Si $pkg --assume-installed="$skip" | grep -m 1 'Depends On' | grep -o 'mingw-w64-[_.a-z0-9-]*' || true)
46+
fi
47+
48+
# Prep output dir
49+
bundle="$package-$version-$arch"
50+
dist="$PWD/dist"
51+
rm -Rf $bundle
52+
mkdir -p $dist $bundle/lib
53+
54+
# Tmp download dir
55+
OUTPUT=$(mktemp -d)
56+
for URL in $URLS; do
57+
curl -OLs $URL
58+
FILE=$(basename $URL)
59+
echo "Extracting: $FILE"
60+
echo " - $FILE" >> $bundle/files.md
61+
tar xf $FILE -C ${OUTPUT}
62+
unlink $FILE
63+
done
64+
65+
# Extract files
66+
cp -Rv ${OUTPUT}/*/include $bundle/
67+
#rm -f ${OUTPUT}/*/lib/*.dll.a
68+
find ${OUTPUT}/*/lib -name \*.dll.a -delete;
69+
#cp -v ${OUTPUT}/*/lib/*.a $bundle/lib/
70+
find ${OUTPUT}/*/lib -name \*.a -exec cp -v {} $bundle/lib/ \;
71+
cp -Rf ${OUTPUT}/*/lib/pkgconfig $bundle/lib/ || true
72+
73+
# Copy xtra files
74+
if [ "$extra_files" ]; then
75+
for file in $extra_files; do
76+
mkdir -p $(dirname $bundle/${file})
77+
cp -Rv ${OUTPUT}/*/${file} $bundle/${file}
78+
done
79+
fi
80+
tar -cJ --no-xattrs -f "$dist/$bundle.tar.xz" $bundle
81+
rm -Rf $bundle
82+
}
83+
84+
create_bundles() {
85+
prepare_pacman
86+
arch="ucrt-x86_64" download_libs
87+
arch="clang-x86_64" download_libs
88+
arch="clang-aarch64" download_libs
89+
90+
# Set success variables
91+
if [ "$GITHUB_OUTPUT" ]; then
92+
echo "version=$version" >> $GITHUB_OUTPUT
93+
fi
94+
}

poppler.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/sh
2+
export extra_files="share/poppler"

tesseract.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/sh
2+
export package="tesseract-ocr"
3+
export extra_files="share/tessdata"

tools/test.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#include <poppler-version.h>
2+
#include <tesseract/baseapi.h>
3+
4+
int main() {
5+
tesseract::TessBaseAPI api;
6+
poppler::version_string();
7+
return 0;
8+
}

tools/winlibs.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
if(!file.exists("../windows/tesseract/include/tesseract/baseapi.h")){
2+
unlink("../windows", recursive = TRUE)
3+
url <- if(grepl("aarch", R.version$platform)){
4+
"https://github.com/r-windows/bundles/releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-clang-aarch64.tar.xz"
5+
} else if(grepl("clang", Sys.getenv('R_COMPILED_BY'))){
6+
"https://github.com/r-windows/bundles/releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-clang-x86_64.tar.xz"
7+
} else if(getRversion() >= "4.3") {
8+
"https://github.com/r-windows/bundles/releases/download/tesseract-5.3.2/tesseract-ocr-5.3.2-ucrt-x86_64.tar.xz"
9+
} else {
10+
"https://github.com/rwinlib/tesseract/archive/v5.3.2.tar.gz"
11+
}
12+
download.file(url, basename(url), quiet = TRUE)
13+
dir.create("../windows", showWarnings = FALSE)
14+
untar(basename(url), exdir = "../windows", tar = 'internal')
15+
unlink(basename(url))
16+
setwd("../windows")
17+
file.rename(list.files(), 'tesseract')
18+
}
19+
20+
# Also download the english training data
21+
dir.create("../windows/tessdata", showWarnings = FALSE)
22+
if(!file.exists("../windows/tessdata/eng.traineddata")){
23+
message("Downloading eng.traineddata...")
24+
download.file("https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/eng.traineddata",
25+
"../windows/tessdata/eng.traineddata", mode = "wb", quiet = TRUE)
26+
}
27+
28+
# This is base training data for Orientation and Script Detection
29+
if(!file.exists("../windows/tessdata/osd.traineddata")){
30+
message("Downloading osd.traineddata...")
31+
download.file("https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/osd.traineddata",
32+
"../windows/tessdata/osd.traineddata", mode = "wb", quiet = TRUE)
33+
}

0 commit comments

Comments
 (0)