tesseract-ocr/tesseract
14 workflows · maturity 33% · 7 patterns · GitHub ↗
Practices
✓ Matrix○ Permissions✓ Security scan○ AI review○ Cache○ Concurrency○ Reusable workflows
Detected patterns
Security dimensions
Tools: github/codeql-action/analyze, github/codeql-action/init
Workflows (14)
autotools matrix .github/workflows/autotools.yml
View raw YAML
name: autotools
# autotools build of tesseract and training tools on Ubuntu.
# run command line tests, basicapitest and unittests. '--disable-openmp'
on:
#push:
schedule:
- cron: 0 20 * * *
jobs:
linux:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 } #installed
- { name: ubuntu-24.04-gcc-14-autotools, os: ubuntu-24.04, cxx: g++-14 } #installed
- { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed
- { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install Compiler
run: |
sudo apt-get update
sudo apt-get install -y ${{ matrix.config.cxx }}
- name: Install dependencies
run: |
sudo apt-get install autoconf libleptonica-dev -y
sudo apt-get install libpango1.0-dev -y
sudo apt-get install cabextract libarchive-dev -y
sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
- name: Setup Tesseract
run: |
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Make and run Unit Tests
run: |
make check
- name: Display Version for tesseract, lstmtraining, text2image
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: List languages in different test tessdata-dir
run: |
tesseract --list-langs --tessdata-dir ../tessdata
tesseract --list-langs --tessdata-dir ../tessdata_best
tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Run Tesseract basicapitest
run: |
export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++17
./basicapitest
- name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
run: |
wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
lscpu
free
tesseract -v
- name: Run Tesseract using image from issue 263 with tessdata_fast
run: |
time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
echo "tessdata_fast - disable-openmp"
- name: Run Tesseract using image from issue 263 with tessdata_best
run: |
time tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1
echo "tessdata_best - disable-openmp"
- name: Run Tesseract using image from issue 263 with tessdata_fast
run: |
time tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1
echo "tessdata - disable-openmp"
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
- name: Display Unit Tests Report
run: |
cat test-suite.log
if: always()
autotools-macos matrix .github/workflows/autotools-macos.yml
View raw YAML
name: autotools-macos
# autotools build of tesseract and training tools on macos homebrew and macports.
# run command line tests, basicapitest and unittests. '--disable-openmp'
on:
#push:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
brew:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ }
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Get fonts, tessdata and langdata required for unit tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install dependencies
run: |
brew install autoconf automake cabextract libtool
brew install leptonica libarchive pango
if ! brew list icu4c &>/dev/null; then
brew install icu4c
fi
if ! brew list curl &>/dev/null; then
brew install curl
fi
- name: Setup Tesseract
run: |
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Make and run Unit Tests (clang)
if: startsWith(matrix.config.cxx, 'clang')
run: |
make check
- name: Make and run Unit Tests (unset LANG needed for g++-8, g++-9, g++-10 on macOS)
if: startsWith(matrix.config.cxx, 'g')
shell: bash
run: |
unset LANG LC_ALL LC_CTYPE
locale
make check
- name: Display Version for tesseract, lstmtraining, text2image
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: List languages in different test tessdata-dir
run: |
tesseract --list-langs --tessdata-dir ../tessdata
tesseract --list-langs --tessdata-dir ../tessdata_best
tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Run Tesseract basicapitest
run: |
export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework accelerate
./basicapitest
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
- name: Display Unit Tests Report
run: |
cat test-suite.log
if: always()
# ============================================================================================
ports:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ }
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Get fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install Macports
run: |
curl -sSLO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install
# --remove-brew does not remove the Homebrew entries in bin,
# so remove them now.
rm -v $(brew --prefix)/bin/*
- name: Install Dependencies
run: |
sudo port install autoconf automake libtool pkgconfig
sudo port install leptonica
sudo port install cairo pango
sudo port install icu +devel
sudo port install cabextract libarchive curl
- name: Setup Tesseract
run: |
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Make and run Unit Tests (clang)
if: startsWith(matrix.config.cxx, 'clang')
run: |
make check
- name: Display Version for tesseract, lstmtraining, text2image
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: List languages in different test tessdata-dir
run: |
tesseract --list-langs --tessdata-dir ../tessdata
tesseract --list-langs --tessdata-dir ../tessdata_best
tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Run Tesseract basicapitest
run: |
export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/opt/local/include -L/opt/local/lib $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework Accelerate
./basicapitest
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
- name: Display Unit Tests Report
run: |
cat test-suite.log
if: always()
autotools-openmp matrix .github/workflows/autotools-openmp.yml
View raw YAML
name: autotools-openmp
# autotools on Ubuntu - run benchmark test. '--enable-openmp' no training tools
on:
#push:
#schedule:
# - cron: 0 20 * * *
workflow_dispatch:
jobs:
linux:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: 24.04-openmp, os: ubuntu-24.04 }
- { name: 22.04-openmp, os: ubuntu-22.04 }
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev -y
sudo apt-get install libpango1.0-dev -y
sudo apt-get install cabextract libarchive-dev -y
sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
- name: Setup Tesseract
run: |
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2'
grep -i OpenMP config.log
- name: Make and Install Tesseract
run: |
make
sudo make install
- name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
run: |
wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
- name: Run Tesseract using image from issue 263 with tessdata_fast
run: |
lscpu
free
g++ --version
tesseract -v
time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
echo "tessdata_fast"
- name: Run Tesseract using image from issue 263 with tessdata_fast and OpenMP Thread Limit
run: |
for lmt in {1..3}; do
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast"
done
- name: Run Tesseract using image from issue 263 with tessdata_best and OpenMP Thread Limit
run: |
for lmt in {1..3}; do
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_best"
done
- name: Run Tesseract using image from issue 263 with tessdata and OpenMP Thread Limit
run: |
for lmt in {1..3}; do
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata"
done
cifuzz .github/workflows/cifuzz.yml
View raw YAML
name: CIFuzz
# OSS-Fuzz CI
# See https://google.github.io/oss-fuzz/getting-started/continuous-integration/
on:
pull_request:
branches:
- main
paths:
- '**.cpp'
- '**.h'
jobs:
Fuzzing:
runs-on: ubuntu-latest
steps:
- name: Build Fuzzers
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
with:
oss-fuzz-project-name: 'tesseract-ocr'
language: c++
dry-run: false
- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
with:
oss-fuzz-project-name: 'tesseract-ocr'
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@v7
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts
path: ./out/artifacts
cmake matrix .github/workflows/cmake.yml
View raw YAML
name: cmake
# cmake build of tesseract and training tools on ubuntu and macOS homebrew using Ninja.
# test command line version of tesseract. run basicapitest.
on:
push:
paths:
- cmake/**
- '**/CMakeLists.txt'
pull_request:
paths:
- cmake/**
- '**/CMakeLists.txt'
schedule:
- cron: 0 21 * * *
workflow_dispatch:
jobs:
basictests:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-14-clang-15-cmake, os: macos-14, cxx: clang++ } # default
- { name: macos-15-gcc-14-cmake, os: macos-15, cxx: g++ } #installed
- { name: macos-15-clang-cmake, os: macos-15, cxx: clang++ } # default
- { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed
- { name: ubuntu-24.04-gcc-12-cmake, os: ubuntu-24.04, cxx: g++-14 } #installed
- { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } #installed
steps:
- name: Install compilers on Linux
run: |
sudo apt-get update
sudo apt-get install ${{ matrix.config.cxx }} -y
if: runner.os == 'Linux'
- name: Install dependencies on Linux
run: |
sudo apt-get install autoconf libleptonica-dev -y
sudo apt-get install libarchive-dev libcurl4-openssl-dev -y
sudo apt-get install libpango1.0-dev -y
sudo apt-get install cabextract -y
sudo apt-get install ninja-build -y
cmake --version
if: runner.os == 'Linux'
- name: Install dependencies on macOS
run: |
brew install autoconf automake
brew install leptonica
# if ! brew list libarchive &>/dev/null; then
# brew install libarchive
# fi
brew install pango
if ! brew list icu4c &>/dev/null; then
brew install icu4c
fi
if ! brew list curl &>/dev/null; then
brew install curl
fi
brew install cabextract
ninja --version
cmake --version
clang++ --version
g++ --version
if: runner.os == 'macOS'
- name: Checkout Source
uses: actions/checkout@v6
with:
submodules: recursive
- name: Configure Tesseract (Linux)
run: |
mkdir build
mkdir inst
cmake \
-S . \
-B build \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DOPENMP_BUILD=OFF \
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
-DCMAKE_INSTALL_PREFIX:PATH=inst
if: runner.os == 'Linux'
- name: Configure Tesseract (macOS)
shell: bash
run: |
set -e
mkdir build
mkdir inst
cmake \
-S . \
-B build \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DOPENMP_BUILD=OFF \
-DENABLE_UNITY_BUILD=ON \
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
-DCMAKE_INSTALL_PREFIX:PATH=inst
if: runner.os == 'macOS'
- name: Build Tesseract
run: |
cmake --build build --config Release --target install
- name: Display Tesseract Version
run: |
build/inst/bin/tesseract -v
- name: Display Training Tools Version
run: |
build/inst/bin/lstmtraining -v
build/inst/bin/text2image -v
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: List languages in different tessdata-dir
run: |
build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata
build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata_best
build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
build/inst/bin/tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
build/inst/bin/tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Build and run basicapitest (Linux)
run: |
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++17
./basicapitest
if: runner.os == 'Linux'
- name: Build and run basicapitest (macOS)
run: |
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++17
./basicapitest
if: runner.os == 'macOS'
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
pwd
ls -la
# git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
cmake-win64 .github/workflows/cmake-win64.yml
View raw YAML
# Based on https://github.com/zdenop/tesserocr/actions/runs/691257659/workflow
# Build Tesseract on Windows using cmake. No Training Tools.
name: cmake-win64
on:
push:
paths:
- cmake/**
- '**/CMakeLists.txt'
pull_request:
types: [opened, reopened, synchronize]
paths:
- cmake/**
- '**/CMakeLists.txt'
schedule:
- cron: 0 5 * * *
workflow_dispatch:
env:
ILOC: d:/a/local
png_ver: 1651
jobs:
build:
name: cmake-win64
runs-on: windows-latest
steps:
- uses: ilammy/setup-nasm@v1
- uses: microsoft/setup-msbuild@v3
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v6
with:
submodules: recursive
- run: git fetch --prune --unshallow --tags
- name: Get the version
id: get_version
continue-on-error: true
run: |
$git_info=$(git describe --tags HEAD)
$stamp=$(date +'%Y-%m-%d_%H%M%S')
echo "version=${git_info}" >> $env:GITHUB_OUTPUT
echo "stamp=${stamp}" >> $env:GITHUB_OUTPUT
- name: Setup Installation Location
run: |
mkdir ${{env.ILOC}}
#- name: Uninstall Perl
# run: |
# choco uninstall strawberryperl
- name: Build and Install zlib-ng
shell: cmd
run: |
git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git
cd zlib-ng
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
cmake --build build --target install
cd ..
- name: Build and Install libpng
shell: cmd
run: |
curl -sSL -o lpng${{env.png_ver}}.zip https://download.sourceforge.net/libpng/lpng${{env.png_ver}}.zip
unzip.exe -qq lpng${{env.png_ver}}.zip
cd lpng${{env.png_ver}}
cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF
cmake --build build --target install
cd ..
- name: Build and Install libjpeg
shell: cmd
run: |
git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git
cd libjpeg-turbo
cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF
cmake --build build --target install
cd ..
- name: Build and Install jbigkit
shell: cmd
run: |
git clone --depth 1 https://github.com/zdenop/jbigkit.git
cd jbigkit
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF
cmake --build build --target install
cd ..
- name: Build and Install libtiff
shell: cmd
run: |
git clone -c advice.detachedHead=false -b "v4.7.1" --depth 1 https://gitlab.com/libtiff/libtiff.git
cd libtiff
cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF
cmake --build build --target install
cd ..
- name: Build and Install leptonica
shell: cmd
run: |
echo "Building leptonica..."
git clone --depth 1 https://github.com/DanBloomberg/leptonica.git
cd leptonica
cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON
cmake --build build --target install
- name: Remove not needed tools Before building tesseract
shell: cmd
run: >
rm -Rf ${{env.ILOC}}/bin/*.exe
- name: Build and Install tesseract
shell: cmd
run: |
cmake -Bbuild -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF
cmake --build build --target install
- name: Upload Build Results
uses: actions/upload-artifact@v7
with:
name: tesseract-${{ steps.get_version.outputs.version }}-${{steps.get_version.outputs.stamp}}-VS2019_win64
path: ${{env.ILOC}}
retention-days: 5
- name: Display Tesseract Version and Test Command Line Usage
shell: cmd
run: |
curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata
curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata
echo "Setting TESSDATA_PREFIX..."
set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata
echo "Setting PATH..."
set PATH=${{env.ILOC}}/bin;%PATH%
echo "Checking installed tesseract version..."
tesseract -v
echo "Checking installed langs"
tesseract --list-langs
echo "Checking OCR process"
tesseract test/testing/phototest.tif -
codeql-analysis matrix security .github/workflows/codeql-analysis.yml
View raw YAML
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ main ]
paths:
- '**.cpp'
- '**.h'
- '**/codeql-analysis.yml'
- 'm4/*.m4'
- 'Makefile.am'
- 'autogen.sh'
- 'configure.ac'
pull_request:
# The branches below must be a subset of the branches above
branches: [ main ]
paths:
- '**.cpp'
- '**.h'
- '**/codeql-analysis.yml'
- 'm4/*.m4'
- 'Makefile.am'
- 'autogen.sh'
- 'configure.ac'
schedule:
- cron: '34 23 * * 2'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'cpp' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
# Learn more:
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev -y
sudo apt-get install libpango1.0-dev -y
sudo apt-get install cabextract libarchive-dev -y
sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
- name: Build
run: |
./autogen.sh
./configure
make all training
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
installer-for-windows .github/workflows/installer-for-windows.yml
View raw YAML
# GitHub actions - Create Tesseract installer for Windows
name: Cross build for Windows
on:
# Trigger workflow in GitHub web frontend or from API.
workflow_dispatch:
inputs:
targets:
description: 'Target operating system'
required: true
default: 'Windows (64 bit)'
type: choice
options:
- 'Windows (64 bit)'
jobs:
build64:
runs-on: [ubuntu-24.04]
steps:
- uses: actions/checkout@v6
- name: Build Tesseract installer (64 bit)
run: nsis/build.sh x86_64
- uses: actions/upload-artifact@v7
with:
name: Tesseract Installer for Windows (64 bit)
path: dist
msys2 matrix .github/workflows/msys2.yml
View raw YAML
name: msys2
# msys2 build for tesseract -head from main branch.
on:
#push:
schedule:
- cron: 0 17 * * *
jobs:
windows:
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
include:
- msystem: MINGW64
mingw_package_prefix: mingw-w64-x86_64
defaults:
run:
shell: msys2 {0}
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: msys2/setup-msys2@v2
with:
msystem: ${{ matrix.msystem }}
install: autoconf automake automake-wrapper git libtool make
- run: pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc
- run: gcc --version
- name: Install dependencies
run: |
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-cairo
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-curl
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc-libs
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-icu
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-leptonica
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-libarchive
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pango
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pkg-config
pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-zlib
- name: Setup Tesseract
run: |
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Build and install Tesseract
run: |
make
make install
- name: Make and install training tools
run: |
make training
make training-install
- name: Display version
run: |
tesseract -v
text2image -v
lstmtraining -v
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Run Tesseract on phototest.tif and devatest.png
run: |
tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata
tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
sw matrix .github/workflows/sw.yml
View raw YAML
name: sw
on:
schedule:
# every 3rd day
- cron: 0 0 */3 * *
jobs:
build:
runs-on: ${{ matrix.os }}
container: ${{ matrix.container }}
strategy:
fail-fast: false
matrix:
os: [windows-2022, macos-latest]
include:
- os: ubuntu-22.04
container: fedora:latest
steps:
- name: packages
if: matrix.os == 'ubuntu-22.04'
run: sudo dnf -y install cmake gcc lld which flex bison clang clang-tools-extra git
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: egorpugin/sw-action@master
- name: build
if: github.event_name != 'pull_request' && (matrix.os == 'windows-2022')
run: ./sw -static -shared -platform x86,x64 -config d,r build
- name: build-pr
if: github.event_name == 'pull_request' && (matrix.os == 'windows-2022')
run: ./sw build
- name: build
if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022')
run: ./sw -static -shared -config d,r build -Dwith-tests=1
- name: build-pr
if: github.event_name == 'pull_request' && (matrix.os != 'windows-2022')
run: ./sw build -Dwith-tests=1
- name: download test data
run: git clone https://github.com/egorpugin/tessdata tessdata_unittest
- name: copy fonts
if: matrix.os != 'windows-2022'
run: cp tessdata_unittest/fonts/* test/testing/
- name: copy fonts
if: matrix.os == 'windows-2022'
run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
shell: pwsh
- name: test
if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022' && matrix.os != 'macos-latest')
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
continue-on-error: true
- name: test
if: github.event_name == 'pull_request' && (matrix.os != 'windows-2022')
run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
continue-on-error: true
- name: test-nightly
if: matrix.os != 'windows-2022' && matrix.os != 'macos-latest' && github.event.schedule=='0 0 * * *'
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
continue-on-error: true
# windows and macos-latest tests hang here for some reason, investigate
#- name: test
#if: matrix.os == 'windows-2022' || matrix.os == 'macos-latest'
#run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
#continue-on-error: true
- name: Upload Unit Test Results
if: always() && matrix.os != 'windows-2022'
uses: actions/upload-artifact@v7
with:
name: Test Results (${{ matrix.os }})
path: .sw/test/results.xml
- name: Publish Test Report
if: always() && matrix.os != 'windows-2022'
uses: mikepenz/action-junit-report@v6
with:
check_name: test (${{ matrix.os }})
report_paths: .sw/test/results.xml
github_token: ${{ secrets.GITHUB_TOKEN }}
unittest matrix .github/workflows/unittest.yml
View raw YAML
name: unittest
# autotools build on ubuntu. unittests with address sanitizers. with openmp.
# ubuntu-20.04-gcc-unittest - CI runs out of diskspace.
on:
#push:
pull_request:
paths:
- '**.cpp'
- '**.h'
- '**Makefile.am'
- '/configure.ac'
- 'unittest/**.c'
- 'unittest/**.cc'
schedule:
- cron: 0 0 * * *
workflow_dispatch:
jobs:
sanitizers:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: ubuntu-24.04-gcc-unittest, os: ubuntu-24.04, cxx: g++, cxxflags: '-g -O2 -fsanitize=address,undefined' }
- { name: ubuntu-22.04-clang-unittest, os: ubuntu-22.04, cxx: clang++, cxxflags: '-g -O2 -fsanitize=address,undefined -stdlib=libc++' }
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Remove Homebrew, Android and .NET to provide more disk space
run: |
# https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150
sudo rm -rf /home/linuxbrew # will release Homebrew
sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
- name: Install dependencies (Linux)
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
sudo apt-get install cabextract -y
- name: Setup
run: |
./autogen.sh
- name: Configure (Linux)
run: |
./configure '--disable-shared' 'CXX=${{ matrix.config.cxx }}' \
'CXXFLAGS=${{ matrix.config.cxxflags }}'
- name: Make and Install Tesseract
run: |
${{ matrix.config.cxx }} --version
make
sudo make install
- name: Make and Install Training Tools
run: |
make training
sudo make training-install
- name: Display Tesseract and Training Tools Version
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Run Tesseract on phototest.tif and devatest.png
run: |
tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata
tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
- name: Make and run Unit Tests
run: |
make check
- name: Display Unit Tests Report and Compiler Version
run: |
cat test-suite.log
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
unittest-disablelegacy matrix .github/workflows/unittest-disablelegacy.yml
View raw YAML
name: unittest-disablelegacy
# autotools build on ubuntu, unittests with disabled legacy engine.
# currently some unittests are failing with disabled legacy engine.
on:
#push:
schedule:
- cron: 0 10 * * *
jobs:
linux:
runs-on: ${{ matrix.os }}
timeout-minutes: 150
strategy:
fail-fast: false
matrix:
compiler: [ g++, clang++-18 ]
os: [ ubuntu-24.04 ]
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
sudo apt-get install cabextract -y
#sudo apt-get install libc++-7-dev libc++abi-7-dev -y
- name: Setup
run: |
./autogen.sh
- name: Configure
run: |
./configure '--disable-shared' '--disable-legacy' 'CXX=${{ matrix.compiler }}'
- name: Make and Install Tesseract
run: |
make
sudo make install install
- name: Make and Install Training Tools
run: |
make training
sudo make install training-install
- name: Display Version
run: |
${{ matrix.compiler }} --version
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Run Tesseract on phototest.tif and devatest.png
run: |
tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata
tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
- name: Make and run Unit Tests
run: |
make check
- name: Display Unit Tests Report
run: |
git log -3
${{ matrix.compiler }} --version
cat test-suite.log
if: always()
unittest-macos matrix .github/workflows/unittest-macos.yml
View raw YAML
name: unittest-macos
# autotools build on homebrew. unittests with address sanitizers. with openmp.
on:
#push:
schedule:
- cron: 0 0 * * *
jobs:
sanitizers:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-arm-14-clang-unittest, os: macos-14, cxx: clang++ } # Apple silicon
- { name: macos-latest-clang-unittest, os: macos-latest, cxx: clang++ }
- { name: macos-latest-gcc-unittest, os: macos-latest, cxx: g++ }
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Install dependencies (macOS Homebrew)
run: |
brew install autoconf automake cabextract libtool
brew install leptonica libarchive pango
if ! brew list icu4c &>/dev/null; then
brew install icu4c
fi
if ! brew list curl &>/dev/null; then
brew install curl
fi
- name: Setup
run: |
./autogen.sh
- name: Configure (macOS Homebrew)
run: |
./configure '--disable-shared' '--with-pic' \
'CXX=${{ matrix.config.cxx }}' \
'CXXFLAGS=-g -O2 -fsanitize=address,undefined'
- name: Make and Install Tesseract
run: |
make
sudo make install
- name: Make and Install Training Tools
run: |
make training
sudo make training-install
- name: Display Tesseract and Training Tools Version
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Run Tesseract on phototest.tif and devatest.png
run: |
tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata
tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
- name: Make and run Unit Tests
run: |
make check
- name: Display Unit Tests Report and compiler version
run: |
cat test-suite.log
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
vcpkg matrix .github/workflows/vcpkg.yml
View raw YAML
name: vcpkg
# build and test of tesseract on windows using vcpkg and cmake.
# vcpkg with -head does not work. https://github.com/microsoft/vcpkg/issues/16019
on:
#push:
schedule:
- cron: 0 23 * * *
workflow_dispatch:
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [windows-latest]
steps:
- name: Checkout Tesseract Source (--head from main branch)
uses: actions/checkout@v6
with:
submodules: recursive
# - name: Visual Studio Setup
# shell: cmd
# run: |
# call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
- name: Install vcpkg
run: |
git clone https://github.com/microsoft/vcpkg
vcpkg/bootstrap-vcpkg.bat
vcpkg/vcpkg integrate install
- name: Build and Install Leptonica and image libraries using vcpkg
run: |
vcpkg/vcpkg install leptonica:x64-windows
- name: Configure and Build Tesseract (--head from main branch) with cmake
run: |
cmake . -B build -DCMAKE_BUILD_TYPE=Release -DSW_BUILD=OFF -DOPENMP_BUILD=OFF -DBUILD_TRAINING_TOOLS=OFF "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
cmake --build build --config Release --target install
- name: Display Tesseract Version
run: |
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --version
- name: Create CMakeLists.txt file for basicapitest
shell: bash
run: |
cd test
cat << "EOF" > CMakeLists.txt
cmake_minimum_required(VERSION 3.19)
project( basicapitest )
find_package( Tesseract REQUIRED )
find_package( Leptonica REQUIRED )
include_directories(${Tesseract_INCLUDE_DIRS})
include_directories(${Leptonica_INCLUDE_DIRS})
add_executable( basicapitest testing/basicapitest.cpp )
target_link_libraries(basicapitest ${Leptonica_LIBRARIES})
target_link_libraries(basicapitest Tesseract::libtesseract)
add_library(libtesseract UNKNOWN IMPORTED)
set_property(TARGET libtesseract PROPERTY IMPORTED_LOCATION D:/a/tesseract/tesseract/build/Release/tesseract50.lib)
target_link_libraries(basicapitest Tesseract::libtesseract)
EOF
cat CMakeLists.txt
- name: Configure basicapitest
run: |
cd test
cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
- name: Build basicapitest
run: |
cd test
cmake --build . --config Release
- name: Download tessdata and image files used for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
mv tessdata_unittest/* ../
- name: Run basicapitest
run: |
cd test
D:\a\tesseract\tesseract\test\Release\basicapitest.exe
- name: Run Tesseract CLI on test images in different languages
run: |
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\phototest.tif - --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\raaj.tif - -l hin --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\viet.tif - -l vie --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\hebrew.png - -l heb --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\eurotext.tif - -l fra --oem 1 --tessdata-dir ..\tessdata_best
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ..\tessdata
- name: List languages in different test tessdata-dir
run: |
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata_best
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata_fast