tesseract-ocr/tesseract

14 workflows · maturity 33% · 7 patterns · GitHub ↗

Security 8.33/100

Practices

✓ Matrix○ Permissions✓ Security scan○ AI review○ Cache○ Concurrency○ Reusable workflows

Detected patterns

Security dimensions

permissions
0
security scan
8.3
supply chain
0
secret handling
0
harden runner
0

Tools: github/codeql-action/analyze, github/codeql-action/init

Workflows (14)

autotools matrix .github/workflows/autotools.yml
Triggers
schedule
Runs on
${{ matrix.config.os }}
Jobs
linux
Matrix
config, config.cxx, config.name, config.os→ clang++-15, g++-11, g++-12, g++-14, ubuntu-22.04, ubuntu-22.04-clang-15-autotools, ubuntu-22.04-gcc-11-autotools, ubuntu-22.04-gcc-12-autotools, ubuntu-24.04, ubuntu-24.04-gcc-14-autotools
Commands
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../
  • sudo apt-get update sudo apt-get install -y ${{ matrix.config.cxx }}
  • sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract libarchive-dev -y sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
  • ./autogen.sh
  • ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
  • make -j 8 sudo make install install
  • make training -j 8 sudo make install training-install
  • make check
View raw YAML
name: autotools
# autotools build of tesseract and training tools on Ubuntu.
# run command line tests, basicapitest and unittests. '--disable-openmp'
on:
  #push:
  schedule:
    - cron: 0 20 * * *
jobs:

  linux:
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 } #installed

          - { name: ubuntu-24.04-gcc-14-autotools, os: ubuntu-24.04, cxx: g++-14 } #installed
          - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed
          - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Download fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Install Compiler
      run: |
           sudo apt-get update
           sudo apt-get install -y ${{ matrix.config.cxx }}

    - name: Install dependencies
      run: |
           sudo apt-get install autoconf libleptonica-dev -y
           sudo apt-get install libpango1.0-dev -y
           sudo apt-get install cabextract libarchive-dev -y
           sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y

    - name: Setup Tesseract
      run: |
           ./autogen.sh

    - name: Configure Tesseract
      run: |
           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'

    - name: Make and Install Tesseract
      run: |
           make -j 8
           sudo make install install

    - name: Make and Install Training Tools
      run: |
           make training -j 8
           sudo make install training-install

    - name: Make and run Unit Tests
      run: |
           make check

    - name: Display Version for tesseract, lstmtraining, text2image
      run: |
           tesseract -v
           lstmtraining -v
           text2image -v
      if: success() || failure()

    - name: List languages in different test tessdata-dir
      run: |
           tesseract  --list-langs --tessdata-dir ../tessdata
           tesseract  --list-langs --tessdata-dir ../tessdata_best
           tesseract  --list-langs --tessdata-dir ../tessdata_fast

    - name: Run Tesseract on test images in different languages
      run: |
           tesseract test/testing/phototest.tif - --oem 1  --tessdata-dir ../tessdata
           tesseract test/testing/raaj.tif - -l hin --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/viet.tif - -l vie --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/hebrew.png - -l heb --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
           tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ../tessdata

    - name: Run Tesseract basicapitest
      run: |
           export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
           cd test
           ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++17
           ./basicapitest

    - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
      run: |
           wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
           printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
           lscpu
           free
           tesseract -v

    - name: Run Tesseract using image from issue 263 with tessdata_fast
      run: |
           time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
           echo "tessdata_fast - disable-openmp"

    - name: Run Tesseract using image from issue 263 with tessdata_best
      run: |
           time tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1
           echo "tessdata_best - disable-openmp"

    - name: Run Tesseract using image from issue 263 with tessdata_fast
      run: |
           time tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1
           echo "tessdata - disable-openmp"

    - name: Display Compiler Version
      run: |
           ${{ matrix.config.cxx }} --version
           git log -3 --pretty=format:'%h %ad %s | %an'
      if: always()

    - name: Display Unit Tests Report
      run: |
           cat test-suite.log
      if: always()
autotools-macos matrix .github/workflows/autotools-macos.yml
Triggers
schedule, workflow_dispatch
Runs on
${{ matrix.config.os }}, ${{ matrix.config.os }}
Jobs
brew, ports
Matrix
config, config.cxx, config.name, config.os→ clang++, macos-latest, macos-latest-clang-autotools
Commands
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../
  • brew install autoconf automake cabextract libtool brew install leptonica libarchive pango if ! brew list icu4c &>/dev/null; then brew install icu4c fi if ! brew list curl &>/dev/null; then brew install curl fi
  • ./autogen.sh
  • ./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
  • make -j 8 sudo make install install
  • make training -j 8 sudo make install training-install
  • make check
  • unset LANG LC_ALL LC_CTYPE locale make check
View raw YAML
name: autotools-macos
# autotools build of tesseract and training tools on macos homebrew and macports.
# run command line tests, basicapitest and unittests. '--disable-openmp'
on:
  #push:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
jobs:

  brew:
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ }

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Get fonts, tessdata and langdata required for unit tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Install dependencies
      run: |
           brew install autoconf automake cabextract libtool
           brew install leptonica libarchive pango
           if ! brew list icu4c &>/dev/null; then
             brew install icu4c
           fi
           if ! brew list curl &>/dev/null; then
             brew install curl
           fi

    - name: Setup Tesseract
      run: |
           ./autogen.sh

    - name: Configure Tesseract
      run: |
           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'

    - name: Make and Install Tesseract
      run: |
           make -j 8
           sudo make install install
    - name: Make and Install Training Tools
      run: |
           make training -j 8
           sudo make install training-install

    - name: Make and run Unit Tests (clang)
      if: startsWith(matrix.config.cxx, 'clang')
      run: |
           make check

    - name: Make and run Unit Tests (unset LANG needed for g++-8, g++-9, g++-10 on macOS)
      if: startsWith(matrix.config.cxx, 'g')
      shell: bash
      run: |
           unset LANG LC_ALL LC_CTYPE
           locale
           make check

    - name: Display Version for tesseract, lstmtraining, text2image
      run: |
           tesseract -v
           lstmtraining -v
           text2image -v
      if: success() || failure()

    - name: List languages in different test tessdata-dir
      run: |
           tesseract  --list-langs --tessdata-dir ../tessdata
           tesseract  --list-langs --tessdata-dir ../tessdata_best
           tesseract  --list-langs --tessdata-dir ../tessdata_fast

    - name: Run Tesseract on test images in different languages
      run: |
           tesseract test/testing/phototest.tif - --oem 1  --tessdata-dir ../tessdata
           tesseract test/testing/raaj.tif - -l hin --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/viet.tif - -l vie --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/hebrew.png - -l heb --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
           tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ../tessdata

    - name: Run Tesseract basicapitest
      run: |
           export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
           cd test
           ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework accelerate
           ./basicapitest

    - name: Display Compiler Version
      run: |
           ${{ matrix.config.cxx }} --version
           git log -3 --pretty=format:'%h %ad %s | %an'
      if: always()

    - name: Display Unit Tests Report
      run: |
           cat test-suite.log
      if: always()

# ============================================================================================

  ports:
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ }

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Get fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Install Macports
      run: |
        curl -sSLO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install
        # --remove-brew does not remove the Homebrew entries in bin,
        # so remove them now.
        rm -v $(brew --prefix)/bin/*

    - name: Install Dependencies
      run: |
           sudo port install autoconf automake libtool pkgconfig
           sudo port install leptonica
           sudo port install cairo pango
           sudo port install icu +devel
           sudo port install cabextract libarchive curl

    - name: Setup Tesseract
      run: |
           ./autogen.sh

    - name: Configure Tesseract
      run: |
           ./configure  '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'

    - name: Make and Install Tesseract
      run: |
           make -j 8
           sudo make install install

    - name: Make and Install Training Tools
      run: |
           make training -j 8
           sudo make install training-install

    - name: Make and run Unit Tests (clang)
      if: startsWith(matrix.config.cxx, 'clang')
      run: |
           make check

    - name: Display Version for tesseract, lstmtraining, text2image
      run: |
           tesseract -v
           lstmtraining -v
           text2image -v
      if: success() || failure()

    - name: List languages in different test tessdata-dir
      run: |
           tesseract  --list-langs --tessdata-dir ../tessdata
           tesseract  --list-langs --tessdata-dir ../tessdata_best
           tesseract  --list-langs --tessdata-dir ../tessdata_fast

    - name: Run Tesseract on test images in different languages
      run: |
           tesseract test/testing/phototest.tif - --oem 1  --tessdata-dir ../tessdata
           tesseract test/testing/raaj.tif - -l hin --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/viet.tif - -l vie --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/hebrew.png - -l heb --oem 1   --tessdata-dir ../tessdata
           tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
           tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ../tessdata

    - name: Run Tesseract basicapitest
      run: |
           export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
           cd test
           ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/opt/local/include -L/opt/local/lib $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework Accelerate
           ./basicapitest

    - name: Display Compiler Version
      run: |
           ${{ matrix.config.cxx }} --version
           git log -3 --pretty=format:'%h %ad %s | %an'
      if: always()

    - name: Display Unit Tests Report
      run: |
           cat test-suite.log
      if: always()
autotools-openmp matrix .github/workflows/autotools-openmp.yml
Triggers
workflow_dispatch
Runs on
${{ matrix.config.os }}
Jobs
linux
Matrix
config, config.name, config.os→ 22.04-openmp, 24.04-openmp, ubuntu-22.04, ubuntu-24.04
Commands
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../
  • sudo apt-get update sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract libarchive-dev -y sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
  • ./autogen.sh
  • ./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2' grep -i OpenMP config.log
  • make sudo make install
  • wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
  • lscpu free g++ --version tesseract -v time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 echo "tessdata_fast"
  • for lmt in {1..3}; do time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast" done
View raw YAML
name: autotools-openmp
# autotools on Ubuntu - run benchmark test. '--enable-openmp' no training tools
on:
  #push:
  #schedule:
  #  - cron: 0 20 * * *
  workflow_dispatch:
jobs:

  linux:
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: 24.04-openmp, os: ubuntu-24.04 }
          - { name: 22.04-openmp, os: ubuntu-22.04 }

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Download fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Install dependencies
      run: |
           sudo apt-get update
           sudo apt-get install autoconf libleptonica-dev -y
           sudo apt-get install libpango1.0-dev -y
           sudo apt-get install cabextract libarchive-dev -y
           sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y

    - name: Setup Tesseract
      run: |
           ./autogen.sh

    - name: Configure Tesseract
      run: |
           ./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2'
           grep -i OpenMP config.log

    - name: Make and Install Tesseract
      run: |
           make
           sudo make install

    - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
      run: |
           wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
           printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list

    - name: Run Tesseract using image from issue 263 with tessdata_fast
      run: |
           lscpu
           free
           g++ --version
           tesseract -v
           time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
           echo "tessdata_fast"

    - name: Run Tesseract using image from issue 263 with tessdata_fast and OpenMP Thread Limit
      run: |
           for lmt in {1..3}; do
                time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast"
           done

    - name: Run Tesseract using image from issue 263 with tessdata_best and OpenMP Thread Limit
      run: |
           for lmt in {1..3}; do
                time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_best"
           done

    - name: Run Tesseract using image from issue 263 with tessdata and OpenMP Thread Limit
      run: |
           for lmt in {1..3}; do
                time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata"
           done
cifuzz .github/workflows/cifuzz.yml
Triggers
pull_request
Runs on
ubuntu-latest
Jobs
Fuzzing
Actions
google/oss-fuzz/infra/cifuzz/actions/build_fuzzers, google/oss-fuzz/infra/cifuzz/actions/run_fuzzers
View raw YAML
name: CIFuzz
# OSS-Fuzz CI
# See https://google.github.io/oss-fuzz/getting-started/continuous-integration/
on:
  pull_request:
    branches:
    - main
    paths:
    - '**.cpp'
    - '**.h'
jobs:
  Fuzzing:
    runs-on: ubuntu-latest
    steps:
    - name: Build Fuzzers
      id: build
      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
      with:
        oss-fuzz-project-name: 'tesseract-ocr'
        language: c++
        dry-run: false
    - name: Run Fuzzers
      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
      with:
        oss-fuzz-project-name: 'tesseract-ocr'
        fuzz-seconds: 600
        dry-run: false
    - name: Upload Crash
      uses: actions/upload-artifact@v7
      if: failure() && steps.build.outcome == 'success'
      with:
        name: artifacts
        path: ./out/artifacts
cmake matrix .github/workflows/cmake.yml
Triggers
push, pull_request, schedule, workflow_dispatch
Runs on
${{ matrix.config.os }}
Jobs
basictests
Matrix
config, config.cxx, config.name, config.os→ clang++, clang++-15, g++, g++-12, g++-14, macos-14, macos-14-clang-15-cmake, macos-15, macos-15-clang-cmake, macos-15-gcc-14-cmake, ubuntu-22.04, ubuntu-22.04-clang-15-cmake, ubuntu-22.04-gcc-12-cmake, ubuntu-24.04, ubuntu-24.04-gcc-12-cmake
Commands
  • sudo apt-get update sudo apt-get install ${{ matrix.config.cxx }} -y
  • sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libarchive-dev libcurl4-openssl-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract -y sudo apt-get install ninja-build -y cmake --version
  • brew install autoconf automake brew install leptonica # if ! brew list libarchive &>/dev/null; then # brew install libarchive # fi brew install pango if ! brew list icu4c &>/dev/null; then brew install icu4c fi if ! brew list curl &>/dev/null; then brew install curl fi brew install cabextract ninja --version cmake --version clang++ --version g++ --version
  • mkdir build mkdir inst cmake \ -S . \ -B build \ -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DOPENMP_BUILD=OFF \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DCMAKE_INSTALL_PREFIX:PATH=inst
  • set -e mkdir build mkdir inst cmake \ -S . \ -B build \ -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DOPENMP_BUILD=OFF \ -DENABLE_UNITY_BUILD=ON \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DCMAKE_INSTALL_PREFIX:PATH=inst
  • cmake --build build --config Release --target install
  • build/inst/bin/tesseract -v
  • build/inst/bin/lstmtraining -v build/inst/bin/text2image -v
View raw YAML
name: cmake
# cmake build of tesseract and training tools on ubuntu and macOS homebrew using Ninja.
# test command line version of tesseract. run basicapitest.
on:
  push:
    paths:
      - cmake/**
      - '**/CMakeLists.txt'
  pull_request:
    paths:
      - cmake/**
      - '**/CMakeLists.txt'
  schedule:
    - cron: 0 21 * * *
  workflow_dispatch:

jobs:
  basictests:
    name: ${{ matrix.config.name }}
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: macos-14-clang-15-cmake, os: macos-14, cxx: clang++ } # default
          - { name: macos-15-gcc-14-cmake, os: macos-15, cxx: g++ } #installed
          - { name: macos-15-clang-cmake, os: macos-15, cxx: clang++ } # default

          - { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed
          - { name: ubuntu-24.04-gcc-12-cmake, os: ubuntu-24.04, cxx: g++-14 } #installed
          - { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } #installed

    steps:
      - name: Install compilers on Linux
        run: |
             sudo apt-get update
             sudo apt-get install ${{ matrix.config.cxx }} -y
        if: runner.os == 'Linux'

      - name: Install dependencies on Linux
        run: |
           sudo apt-get install autoconf libleptonica-dev -y
           sudo apt-get install libarchive-dev libcurl4-openssl-dev -y
           sudo apt-get install libpango1.0-dev -y
           sudo apt-get install cabextract -y
           sudo apt-get install ninja-build -y
           cmake --version
        if: runner.os == 'Linux'

      - name: Install dependencies on macOS
        run: |
           brew install autoconf automake
           brew install leptonica
           # if ! brew list libarchive &>/dev/null; then
           #   brew install libarchive
           # fi
           brew install pango
           if ! brew list icu4c &>/dev/null; then
             brew install icu4c
           fi
           if ! brew list curl &>/dev/null; then
             brew install curl
           fi
           brew install cabextract
           ninja --version
           cmake --version
           clang++ --version
           g++ --version
        if: runner.os == 'macOS'

      - name: Checkout Source
        uses: actions/checkout@v6
        with:
             submodules: recursive

      - name: Configure Tesseract (Linux)
        run: |
             mkdir build
             mkdir inst
             cmake \
               -S . \
               -B build \
               -G Ninja \
               -DCMAKE_BUILD_TYPE=Release \
               -DOPENMP_BUILD=OFF \
               -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
               -DCMAKE_INSTALL_PREFIX:PATH=inst
        if: runner.os == 'Linux'

      - name: Configure Tesseract (macOS)
        shell: bash
        run: |
             set -e
             mkdir build
             mkdir inst
             cmake \
               -S . \
               -B build \
               -G Ninja \
               -DCMAKE_BUILD_TYPE=Release \
               -DOPENMP_BUILD=OFF \
               -DENABLE_UNITY_BUILD=ON \
               -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
               -DCMAKE_INSTALL_PREFIX:PATH=inst
        if: runner.os == 'macOS'

      - name: Build Tesseract
        run: |
             cmake --build build --config Release --target install

      - name: Display Tesseract Version
        run: |
             build/inst/bin/tesseract -v

      - name: Display Training Tools Version
        run: |
             build/inst/bin/lstmtraining -v
             build/inst/bin/text2image -v

      - name: Download fonts, tessdata and langdata required for tests
        run: |
             git clone https://github.com/egorpugin/tessdata tessdata_unittest
             cp tessdata_unittest/fonts/* test/testing/
             mv tessdata_unittest/* ../

      - name: List languages in different tessdata-dir
        run: |
             build/inst/bin/tesseract  --list-langs --tessdata-dir ../tessdata
             build/inst/bin/tesseract  --list-langs --tessdata-dir ../tessdata_best
             build/inst/bin/tesseract  --list-langs --tessdata-dir ../tessdata_fast

      - name: Run Tesseract on test images in different languages
        run: |
             build/inst/bin/tesseract test/testing/phototest.tif - --oem 1  --tessdata-dir ../tessdata
             build/inst/bin/tesseract test/testing/raaj.tif - -l hin --oem 1   --tessdata-dir ../tessdata
             build/inst/bin/tesseract test/testing/viet.tif - -l vie --oem 1   --tessdata-dir ../tessdata
             build/inst/bin/tesseract test/testing/hebrew.png - -l heb --oem 1   --tessdata-dir ../tessdata
             build/inst/bin/tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
             build/inst/bin/tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ../tessdata

      - name: Build and run basicapitest (Linux)
        run: |
             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
             cd test
             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++17
             ./basicapitest
        if: runner.os == 'Linux'

      - name: Build and run basicapitest (macOS)
        run: |
             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
             cd test
             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++17
             ./basicapitest
        if: runner.os == 'macOS'

      - name: Display Compiler Version
        run: |
             ${{ matrix.config.cxx }} --version
             pwd
             ls -la
             # git log -3 --pretty=format:'%h %ad %s | %an'
        if: always()
cmake-win64 .github/workflows/cmake-win64.yml
Triggers
push, pull_request, schedule, workflow_dispatch
Runs on
windows-latest
Jobs
build
Actions
ilammy/setup-nasm, microsoft/setup-msbuild
Commands
  • git fetch --prune --unshallow --tags
  • $git_info=$(git describe --tags HEAD) $stamp=$(date +'%Y-%m-%d_%H%M%S') echo "version=${git_info}" >> $env:GITHUB_OUTPUT echo "stamp=${stamp}" >> $env:GITHUB_OUTPUT
  • mkdir ${{env.ILOC}}
  • git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git cd zlib-ng cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF cmake --build build --target install cd ..
  • curl -sSL -o lpng${{env.png_ver}}.zip https://download.sourceforge.net/libpng/lpng${{env.png_ver}}.zip unzip.exe -qq lpng${{env.png_ver}}.zip cd lpng${{env.png_ver}} cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF cmake --build build --target install cd ..
  • git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git cd libjpeg-turbo cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF cmake --build build --target install cd ..
  • git clone --depth 1 https://github.com/zdenop/jbigkit.git cd jbigkit cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF cmake --build build --target install cd ..
  • git clone -c advice.detachedHead=false -b "v4.7.1" --depth 1 https://gitlab.com/libtiff/libtiff.git cd libtiff cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF cmake --build build --target install cd ..
View raw YAML
# Based on https://github.com/zdenop/tesserocr/actions/runs/691257659/workflow
# Build Tesseract on Windows using cmake. No Training Tools.
name: cmake-win64
on:
  push:
    paths:
      - cmake/**
      - '**/CMakeLists.txt'
  pull_request:
    types: [opened, reopened, synchronize]
    paths:
      - cmake/**
      - '**/CMakeLists.txt'
  schedule:
    - cron: 0 5 * * *
  workflow_dispatch:

env:
  ILOC: d:/a/local
  png_ver: 1651

jobs:
  build:
    name: cmake-win64
    runs-on: windows-latest
    steps:
      - uses: ilammy/setup-nasm@v1
      - uses: microsoft/setup-msbuild@v3
      - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
        uses: actions/checkout@v6
        with:
          submodules: recursive
      - run: git fetch --prune --unshallow --tags

      - name: Get the version
        id: get_version
        continue-on-error: true
        run: |
             $git_info=$(git describe --tags HEAD)
             $stamp=$(date +'%Y-%m-%d_%H%M%S')
             echo "version=${git_info}" >> $env:GITHUB_OUTPUT
             echo "stamp=${stamp}" >> $env:GITHUB_OUTPUT

      - name: Setup Installation Location
        run: |
             mkdir ${{env.ILOC}}

      #- name: Uninstall Perl
      #  run: |
      #    choco uninstall strawberryperl

      - name: Build and Install zlib-ng
        shell: cmd
        run: |
             git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git
             cd zlib-ng
             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
             cmake --build build --target install
             cd ..

      - name: Build and Install libpng
        shell: cmd
        run: |
             curl -sSL -o lpng${{env.png_ver}}.zip https://download.sourceforge.net/libpng/lpng${{env.png_ver}}.zip
             unzip.exe  -qq lpng${{env.png_ver}}.zip
             cd lpng${{env.png_ver}}
             cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF
             cmake --build build --target install
             cd ..

      - name: Build and Install libjpeg
        shell: cmd
        run: |
             git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git
             cd libjpeg-turbo
             cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF
             cmake --build build --target install
             cd ..

      - name: Build and Install jbigkit
        shell: cmd
        run: |
             git clone --depth 1 https://github.com/zdenop/jbigkit.git
             cd jbigkit
             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF
             cmake --build build --target install
             cd ..

      - name: Build and Install libtiff
        shell: cmd
        run: |
             git clone -c advice.detachedHead=false -b "v4.7.1" --depth 1 https://gitlab.com/libtiff/libtiff.git
             cd libtiff
             cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF
             cmake --build build --target install
             cd ..

      - name: Build and Install leptonica
        shell: cmd
        run: |
             echo "Building leptonica..."
             git clone --depth 1 https://github.com/DanBloomberg/leptonica.git
             cd leptonica
             cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON
             cmake --build build --target install

      - name: Remove not needed tools Before building tesseract
        shell: cmd
        run: >
             rm -Rf ${{env.ILOC}}/bin/*.exe

      - name: Build and Install tesseract
        shell: cmd
        run: |
             cmake -Bbuild -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF
             cmake --build build --target install

      - name: Upload Build Results
        uses: actions/upload-artifact@v7
        with:
          name: tesseract-${{ steps.get_version.outputs.version }}-${{steps.get_version.outputs.stamp}}-VS2019_win64
          path: ${{env.ILOC}}
          retention-days: 5

      - name: Display Tesseract Version and Test Command Line Usage
        shell: cmd
        run: |
          curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata
          curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata
          echo "Setting TESSDATA_PREFIX..."
          set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata
          echo "Setting PATH..."
          set PATH=${{env.ILOC}}/bin;%PATH%
          echo "Checking installed tesseract version..."
          tesseract -v
          echo "Checking installed langs"
          tesseract --list-langs
          echo "Checking OCR process"
          tesseract test/testing/phototest.tif -
codeql-analysis matrix security .github/workflows/codeql-analysis.yml
Triggers
push, pull_request, schedule
Runs on
ubuntu-latest
Jobs
analyze
Matrix
language→ cpp
Actions
github/codeql-action/init, github/codeql-action/analyze
Commands
  • sudo apt-get update sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract libarchive-dev -y sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
  • ./autogen.sh ./configure make all training
View raw YAML
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"

on:
  push:
    branches: [ main ]
    paths:
      - '**.cpp'
      - '**.h'
      - '**/codeql-analysis.yml'
      - 'm4/*.m4'
      - 'Makefile.am'
      - 'autogen.sh'
      - 'configure.ac'
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [ main ]
    paths:
      - '**.cpp'
      - '**.h'
      - '**/codeql-analysis.yml'
      - 'm4/*.m4'
      - 'Makefile.am'
      - 'autogen.sh'
      - 'configure.ac'
  schedule:
    - cron: '34 23 * * 2'

jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: [ 'cpp' ]
        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
        # Learn more:
        # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed

    steps:
    - name: Checkout repository
      uses: actions/checkout@v6

    - name: Install dependencies
      run: |
           sudo apt-get update
           sudo apt-get install autoconf libleptonica-dev -y
           sudo apt-get install libpango1.0-dev -y
           sudo apt-get install cabextract libarchive-dev -y
           sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v4
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.
        # queries: ./path/to/local/query, your-org/your-repo/queries@main

    - name: Build
      run: |
       ./autogen.sh
       ./configure
       make all training

    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v4
installer-for-windows .github/workflows/installer-for-windows.yml
Triggers
workflow_dispatch
Runs on
ubuntu-24.04
Jobs
build64
Commands
  • nsis/build.sh x86_64
View raw YAML
# GitHub actions - Create Tesseract installer for Windows

name: Cross build for Windows

on:
  # Trigger workflow in GitHub web frontend or from API.
  workflow_dispatch:
    inputs:
      targets:
        description: 'Target operating system'
        required: true
        default: 'Windows (64 bit)'
        type: choice
        options:
          - 'Windows (64 bit)'

jobs:
  build64:
    runs-on: [ubuntu-24.04]
    steps:
    - uses: actions/checkout@v6
    - name: Build Tesseract installer (64 bit)
      run: nsis/build.sh x86_64
    - uses: actions/upload-artifact@v7
      with:
        name: Tesseract Installer for Windows (64 bit)
        path: dist
msys2 matrix .github/workflows/msys2.yml
Triggers
schedule
Runs on
windows-latest
Jobs
windows
Matrix
include, include.mingw_package_prefix, include.msystem→ MINGW64, mingw-w64-x86_64
Actions
msys2/setup-msys2
Commands
  • pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc
  • gcc --version
  • pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-cairo pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-curl pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc-libs pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-icu pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-leptonica pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-libarchive pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pango pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pkg-config pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-zlib
  • ./autogen.sh
  • ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
  • make make install
  • make training make training-install
  • tesseract -v text2image -v lstmtraining -v
View raw YAML
name: msys2
# msys2 build for tesseract -head from main branch.
on:
  #push:
  schedule:
    - cron: 0 17 * * *
jobs:
  windows:
    runs-on: windows-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          - msystem: MINGW64
            mingw_package_prefix: mingw-w64-x86_64
    defaults:
      run:
        shell: msys2 {0}
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive
    - uses: msys2/setup-msys2@v2
      with:
        msystem: ${{ matrix.msystem }}
        install: autoconf automake automake-wrapper git libtool make
    - run: pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc
    - run: gcc --version

    - name: Install dependencies
      run: |
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-cairo
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-curl
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc-libs
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-icu
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-leptonica
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-libarchive
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pango
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pkg-config
           pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-zlib

    - name: Setup Tesseract
      run: |
           ./autogen.sh

    - name: Configure Tesseract
      run: |
           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'

    - name: Build and install Tesseract
      run: |
           make
           make install

    - name: Make and install training tools
      run: |
           make training
           make training-install

    - name: Display version
      run: |
           tesseract -v
           text2image -v
           lstmtraining -v

    - name: Download fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Run Tesseract on phototest.tif and devatest.png
      run: |
           tesseract test/testing/phototest.tif -  --tessdata-dir ../tessdata
           tesseract test/testing/devatest.png - -l hin+eng  --tessdata-dir ../tessdata
sw matrix .github/workflows/sw.yml
Triggers
schedule
Runs on
${{ matrix.os }}
Jobs
build
Matrix
include, include.container, include.os, os→ fedora:latest, macos-latest, ubuntu-22.04, windows-2022
Actions
egorpugin/sw-action, mikepenz/action-junit-report
Commands
  • sudo dnf -y install cmake gcc lld which flex bison clang clang-tools-extra git
  • ./sw -static -shared -platform x86,x64 -config d,r build
  • ./sw build
  • ./sw -static -shared -config d,r build -Dwith-tests=1
  • ./sw build -Dwith-tests=1
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest
  • cp tessdata_unittest/fonts/* test/testing/
  • Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
View raw YAML
name: sw

on:
  schedule:
    # every 3rd day
    - cron: 0 0 */3 * *

jobs:
  build:
    runs-on: ${{ matrix.os }}
    container: ${{ matrix.container }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022, macos-latest]
        include:
          - os: ubuntu-22.04
            container: fedora:latest

    steps:
    - name: packages
      if: matrix.os == 'ubuntu-22.04'
      run: sudo dnf -y install cmake gcc lld which flex bison clang clang-tools-extra git

    - uses: actions/checkout@v6
      with:
        submodules: recursive
    - uses: egorpugin/sw-action@master

    - name: build
      if: github.event_name != 'pull_request' && (matrix.os == 'windows-2022')
      run: ./sw -static -shared -platform x86,x64 -config d,r build
    - name: build-pr
      if: github.event_name == 'pull_request' && (matrix.os == 'windows-2022')
      run: ./sw build

    - name: build
      if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022')
      run: ./sw -static -shared -config d,r build -Dwith-tests=1
    - name: build-pr
      if: github.event_name == 'pull_request' && (matrix.os != 'windows-2022')
      run: ./sw build -Dwith-tests=1

    - name: download test data
      run: git clone https://github.com/egorpugin/tessdata tessdata_unittest

    - name: copy fonts
      if: matrix.os != 'windows-2022'
      run: cp tessdata_unittest/fonts/* test/testing/
    - name: copy fonts
      if: matrix.os == 'windows-2022'
      run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
      shell: pwsh

    - name: test
      if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022' && matrix.os != 'macos-latest')
      run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
      continue-on-error: true
    - name: test
      if: github.event_name == 'pull_request' && (matrix.os != 'windows-2022')
      run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
      continue-on-error: true

    - name: test-nightly
      if: matrix.os != 'windows-2022' && matrix.os != 'macos-latest' && github.event.schedule=='0 0 * * *'
      run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
      continue-on-error: true

    # windows and macos-latest tests hang here for some reason, investigate
    #- name: test
      #if: matrix.os == 'windows-2022' || matrix.os == 'macos-latest'
      #run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
      #continue-on-error: true

    - name: Upload Unit Test Results
      if: always() && matrix.os != 'windows-2022'
      uses: actions/upload-artifact@v7
      with:
        name: Test Results (${{ matrix.os }})
        path: .sw/test/results.xml

    - name: Publish Test Report
      if: always() && matrix.os != 'windows-2022'
      uses: mikepenz/action-junit-report@v6
      with:
        check_name: test (${{ matrix.os }})
        report_paths: .sw/test/results.xml
        github_token: ${{ secrets.GITHUB_TOKEN }}
unittest matrix .github/workflows/unittest.yml
Triggers
pull_request, schedule, workflow_dispatch
Runs on
${{ matrix.config.os }}
Jobs
sanitizers
Matrix
config, config.cxx, config.cxxflags, config.name, config.os→ -g -O2 -fsanitize=address,undefined, -g -O2 -fsanitize=address,undefined -stdlib=libc++, clang++, g++, ubuntu-22.04, ubuntu-22.04-clang-unittest, ubuntu-24.04, ubuntu-24.04-gcc-unittest
Commands
  • # https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150 sudo rm -rf /home/linuxbrew # will release Homebrew sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
  • sudo apt-get update sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y sudo apt-get install cabextract -y
  • ./autogen.sh
  • ./configure '--disable-shared' 'CXX=${{ matrix.config.cxx }}' \ 'CXXFLAGS=${{ matrix.config.cxxflags }}'
  • ${{ matrix.config.cxx }} --version make sudo make install
  • make training sudo make training-install
  • tesseract -v lstmtraining -v text2image -v
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../
View raw YAML
name: unittest
# autotools build on ubuntu. unittests with address sanitizers. with openmp.
# ubuntu-20.04-gcc-unittest - CI runs out of diskspace.
on:
  #push:
  pull_request:
    paths:
      - '**.cpp'
      - '**.h'
      - '**Makefile.am'
      - '/configure.ac'
      - 'unittest/**.c'
      - 'unittest/**.cc'
  schedule:
    - cron: 0 0 * * *
  workflow_dispatch:

jobs:
  sanitizers:
    name: ${{ matrix.config.name }}
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: ubuntu-24.04-gcc-unittest, os: ubuntu-24.04, cxx: g++, cxxflags: '-g -O2 -fsanitize=address,undefined' }
          - { name: ubuntu-22.04-clang-unittest, os: ubuntu-22.04, cxx: clang++, cxxflags: '-g -O2 -fsanitize=address,undefined -stdlib=libc++' }
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Remove Homebrew, Android and .NET to provide more disk space
      run: |
           # https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150
           sudo rm -rf /home/linuxbrew # will release Homebrew
           sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
           sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET

    - name: Install dependencies (Linux)
      run: |
           sudo apt-get update
           sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
           sudo apt-get install cabextract -y

    - name: Setup
      run: |
           ./autogen.sh

    - name: Configure (Linux)
      run: |
           ./configure '--disable-shared' 'CXX=${{ matrix.config.cxx }}' \
               'CXXFLAGS=${{ matrix.config.cxxflags }}'

    - name: Make and Install Tesseract
      run: |
           ${{ matrix.config.cxx }} --version
           make
           sudo make install

    - name: Make and Install Training Tools
      run: |
           make training
           sudo make training-install

    - name: Display Tesseract and Training Tools Version
      run: |
           tesseract -v
           lstmtraining -v
           text2image -v
      if: success() || failure()

    - name: Download fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Run Tesseract on phototest.tif and devatest.png
      run: |
           tesseract test/testing/phototest.tif -  --tessdata-dir ../tessdata
           tesseract test/testing/devatest.png - -l hin+eng  --tessdata-dir ../tessdata

    - name: Make and run Unit Tests
      run: |
           make check

    - name: Display Unit Tests Report and Compiler Version
      run: |
           cat test-suite.log
           ${{ matrix.config.cxx }} --version
           git log -3 --pretty=format:'%h %ad %s | %an'
      if: always()
unittest-disablelegacy matrix .github/workflows/unittest-disablelegacy.yml
Triggers
schedule
Runs on
${{ matrix.os }}
Jobs
linux
Matrix
compiler, os→ clang++-18, g++, ubuntu-24.04
Commands
  • sudo apt-get update sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y sudo apt-get install cabextract -y #sudo apt-get install libc++-7-dev libc++abi-7-dev -y
  • ./autogen.sh
  • ./configure '--disable-shared' '--disable-legacy' 'CXX=${{ matrix.compiler }}'
  • make sudo make install install
  • make training sudo make install training-install
  • ${{ matrix.compiler }} --version tesseract -v lstmtraining -v text2image -v
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../
  • tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
View raw YAML
name: unittest-disablelegacy
# autotools build on ubuntu, unittests with disabled legacy engine.
# currently some unittests are failing with disabled legacy engine.

on:
  #push:
  schedule:
    - cron: 0 10 * * *

jobs:
  linux:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 150
    strategy:
      fail-fast: false
      matrix:
        compiler: [ g++, clang++-18 ]
        os: [ ubuntu-24.04 ]

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Install dependencies
      run: |
           sudo apt-get update
           sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
           sudo apt-get install cabextract -y
           #sudo apt-get install libc++-7-dev libc++abi-7-dev -y

    - name: Setup
      run: |
           ./autogen.sh

    - name: Configure
      run: |
           ./configure '--disable-shared' '--disable-legacy' 'CXX=${{ matrix.compiler }}'

    - name: Make and Install Tesseract
      run: |
           make
           sudo make install install

    - name: Make and Install Training Tools
      run: |
           make training
           sudo make install training-install

    - name: Display Version
      run: |
           ${{ matrix.compiler }} --version
           tesseract -v
           lstmtraining -v
           text2image -v
      if: success() || failure()

    - name: Download fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Run Tesseract on phototest.tif and devatest.png
      run: |
           tesseract test/testing/phototest.tif -  --tessdata-dir ../tessdata
           tesseract test/testing/devatest.png - -l hin+eng  --tessdata-dir ../tessdata

    - name: Make and run Unit Tests
      run: |
           make check

    - name: Display Unit Tests Report
      run: |
           git log -3
           ${{ matrix.compiler }} --version
           cat test-suite.log
      if: always()
unittest-macos matrix .github/workflows/unittest-macos.yml
Triggers
schedule
Runs on
${{ matrix.config.os }}
Jobs
sanitizers
Matrix
config, config.cxx, config.name, config.os→ clang++, g++, macos-14, macos-arm-14-clang-unittest, macos-latest, macos-latest-clang-unittest, macos-latest-gcc-unittest
Commands
  • brew install autoconf automake cabextract libtool brew install leptonica libarchive pango if ! brew list icu4c &>/dev/null; then brew install icu4c fi if ! brew list curl &>/dev/null; then brew install curl fi
  • ./autogen.sh
  • ./configure '--disable-shared' '--with-pic' \ 'CXX=${{ matrix.config.cxx }}' \ 'CXXFLAGS=-g -O2 -fsanitize=address,undefined'
  • make sudo make install
  • make training sudo make training-install
  • tesseract -v lstmtraining -v text2image -v
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../
  • tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
View raw YAML
name: unittest-macos
# autotools build on homebrew. unittests with address sanitizers. with openmp.
on:
  #push:
  schedule:
    - cron: 0 0 * * *

jobs:
  sanitizers:
    name: ${{ matrix.config.name }}
    runs-on: ${{ matrix.config.os }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - { name: macos-arm-14-clang-unittest, os: macos-14, cxx: clang++ } # Apple silicon
          - { name: macos-latest-clang-unittest, os: macos-latest, cxx: clang++ }
          - { name: macos-latest-gcc-unittest, os: macos-latest, cxx: g++ }

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: recursive

    - name: Install dependencies (macOS Homebrew)
      run: |
           brew install autoconf automake cabextract libtool
           brew install leptonica libarchive pango
           if ! brew list icu4c &>/dev/null; then
             brew install icu4c
           fi
           if ! brew list curl &>/dev/null; then
             brew install curl
           fi

    - name: Setup
      run: |
           ./autogen.sh

    - name: Configure (macOS Homebrew)
      run: |
           ./configure '--disable-shared' '--with-pic' \
                'CXX=${{ matrix.config.cxx }}' \
                'CXXFLAGS=-g -O2 -fsanitize=address,undefined'

    - name: Make and Install Tesseract
      run: |
           make
           sudo make install

    - name: Make and Install Training Tools
      run: |
           make training
           sudo make training-install

    - name: Display Tesseract and Training Tools Version
      run: |
           tesseract -v
           lstmtraining -v
           text2image -v
      if: success() || failure()

    - name: Download fonts, tessdata and langdata required for tests
      run: |
           git clone https://github.com/egorpugin/tessdata tessdata_unittest
           cp tessdata_unittest/fonts/* test/testing/
           mv tessdata_unittest/* ../

    - name: Run Tesseract on phototest.tif and devatest.png
      run: |
           tesseract test/testing/phototest.tif -  --tessdata-dir ../tessdata
           tesseract test/testing/devatest.png - -l hin+eng  --tessdata-dir ../tessdata

    - name: Make and run Unit Tests
      run: |
           make check

    - name: Display Unit Tests Report and compiler version
      run: |
           cat test-suite.log
           ${{ matrix.config.cxx }} --version
           git log -3 --pretty=format:'%h %ad %s | %an'
      if: always()
vcpkg matrix .github/workflows/vcpkg.yml
Triggers
schedule, workflow_dispatch
Runs on
${{ matrix.os }}
Jobs
build
Matrix
os→ windows-latest
Commands
  • git clone https://github.com/microsoft/vcpkg vcpkg/bootstrap-vcpkg.bat vcpkg/vcpkg integrate install
  • vcpkg/vcpkg install leptonica:x64-windows
  • cmake . -B build -DCMAKE_BUILD_TYPE=Release -DSW_BUILD=OFF -DOPENMP_BUILD=OFF -DBUILD_TRAINING_TOOLS=OFF "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake" cmake --build build --config Release --target install
  • D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --version
  • cd test cat << "EOF" > CMakeLists.txt cmake_minimum_required(VERSION 3.19) project( basicapitest ) find_package( Tesseract REQUIRED ) find_package( Leptonica REQUIRED ) include_directories(${Tesseract_INCLUDE_DIRS}) include_directories(${Leptonica_INCLUDE_DIRS}) add_executable( basicapitest testing/basicapitest.cpp ) target_link_libraries(basicapitest ${Leptonica_LIBRARIES}) target_link_libraries(basicapitest Tesseract::libtesseract) add_library(libtesseract UNKNOWN IMPORTED) set_property(TARGET libtesseract PROPERTY IMPORTED_LOCATION D:/a/tesseract/tesseract/build/Release/tesseract50.lib) target_link_libraries(basicapitest Tesseract::libtesseract) EOF cat CMakeLists.txt
  • cd test cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
  • cd test cmake --build . --config Release
  • git clone https://github.com/egorpugin/tessdata tessdata_unittest mv tessdata_unittest/* ../
View raw YAML
name: vcpkg
# build and test of tesseract on windows using vcpkg and cmake.
# vcpkg with -head does not work. https://github.com/microsoft/vcpkg/issues/16019
on:
  #push:
  schedule:
    - cron: 0 23 * * *
  workflow_dispatch:

jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-latest]

    steps:
      - name: Checkout Tesseract Source (--head from main branch)
        uses: actions/checkout@v6
        with:
          submodules: recursive

     # - name: Visual Studio Setup
     #   shell: cmd
     #   run: |
     #        call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"

      - name: Install vcpkg
        run: |
             git clone https://github.com/microsoft/vcpkg
             vcpkg/bootstrap-vcpkg.bat
             vcpkg/vcpkg integrate install

      - name: Build and Install Leptonica and image libraries using vcpkg
        run: |
             vcpkg/vcpkg install leptonica:x64-windows

      - name: Configure and Build Tesseract (--head from main branch) with cmake
        run: |
             cmake . -B build -DCMAKE_BUILD_TYPE=Release -DSW_BUILD=OFF -DOPENMP_BUILD=OFF -DBUILD_TRAINING_TOOLS=OFF "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
             cmake --build build --config Release --target install

      - name: Display Tesseract Version
        run: |
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --version

      - name: Create CMakeLists.txt file for basicapitest
        shell: bash
        run: |
             cd test
             cat << "EOF" > CMakeLists.txt
             cmake_minimum_required(VERSION 3.19)
             project( basicapitest )
             find_package( Tesseract REQUIRED )
             find_package( Leptonica REQUIRED )
             include_directories(${Tesseract_INCLUDE_DIRS})
             include_directories(${Leptonica_INCLUDE_DIRS})
             add_executable( basicapitest testing/basicapitest.cpp )
             target_link_libraries(basicapitest ${Leptonica_LIBRARIES})
             target_link_libraries(basicapitest Tesseract::libtesseract)
             add_library(libtesseract UNKNOWN IMPORTED)
             set_property(TARGET libtesseract PROPERTY IMPORTED_LOCATION D:/a/tesseract/tesseract/build/Release/tesseract50.lib)
             target_link_libraries(basicapitest Tesseract::libtesseract)
             EOF
             cat CMakeLists.txt

      - name: Configure basicapitest
        run: |
             cd test
             cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"

      - name: Build basicapitest
        run: |
             cd test
             cmake --build .  --config Release

      - name: Download tessdata and image files used for tests
        run: |
             git clone https://github.com/egorpugin/tessdata tessdata_unittest
             mv tessdata_unittest/* ../

      - name: Run basicapitest
        run: |
             cd test
             D:\a\tesseract\tesseract\test\Release\basicapitest.exe

      - name: Run Tesseract CLI on test images in different languages
        run: |
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  test\testing\phototest.tif - --oem 1  --tessdata-dir ..\tessdata
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  test\testing\raaj.tif - -l hin --oem 1   --tessdata-dir ..\tessdata
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  test\testing\viet.tif - -l vie --oem 1   --tessdata-dir ..\tessdata
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  test\testing\hebrew.png - -l heb --oem 1   --tessdata-dir ..\tessdata
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  test\testing\eurotext.tif - -l fra --oem 1 --tessdata-dir ..\tessdata_best
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  test\testing\arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ..\tessdata

      - name: List languages in different test tessdata-dir
        run: |
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  --list-langs --tessdata-dir ..\tessdata
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  --list-langs --tessdata-dir ..\tessdata_best
             D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe  --list-langs --tessdata-dir ..\tessdata_fast