Skip to content

Commit

Permalink
fix!: Replace old links and setup for libraries
Browse files Browse the repository at this point in the history
What I did:
- Replace non-working link
- Replace not working or irrelevent commands
- All library are installed correctly
- To compile the ccextracter with all libraries we need to have correct configure file and Makefile which makes this bash script file breaking right now

Goal for now:
- to compile and make `ccextracter` binary(which is not fullfiling)

Code breaks at:
When it clones ccextracter and try to run `make` command and it breaks
  • Loading branch information
IshanGrover2004 committed Jan 27, 2024
1 parent c550726 commit 6f70cdb
Showing 1 changed file with 118 additions and 84 deletions.
202 changes: 118 additions & 84 deletions linux/build-static.sh
Original file line number Diff line number Diff line change
@@ -1,107 +1,141 @@
#!/usr/bin/env -S sh -ex

####################################################################
# setup by tracey apr 2012
# updated version dec 2016
# see: http://www.ccextractor.org/doku.php
####################################################################


# build it static!
# simplest way is with linux alpine
# hop onto box with docker on it and cd to dir of the file you are staring at
# You will get a static-compiled binary and english language library file in the end.
if [ ! -e /tmp/cc/ccextractor-README.txt ]; then
rm -rf /tmp/cc;
mkdir -p -m777 /tmp/cc;
mkdir -p -m777 ../lib/tessdata/;
cp ccextractor-README.txt /tmp/cc/;
sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest /tmp/cc/ccextractor-README.txt;
# NOTE: _AFTER_ testing/validating, you can promote it from "ccextractor.next" to "ccextractor"... ;-)
cp /tmp/cc/*traineddata ../lib/tessdata/;
chmod go-w ../lib/tessdata/;
exit 0;
#!/bin/sh
# !/usr/bin/env -S sh -ex

if [ ! -e /tmp/cc/build-static.sh ]; then
rm -rf /tmp/cc
mkdir -p -m777 /tmp/cc
mkdir -p -m777 ../lib/tessdata/
cp build-static.sh /tmp/cc/
echo 'I am making docker container'
sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest sh -c /tmp/cc/build-static.sh
# sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest sh -c /tmp/cc/script.sh
cp /tmp/cc/*traineddata ../lib/tessdata/
chmod go-w ../lib/tessdata/
exit 0
fi

# NOW we are inside docker container...
cd /tmp/cc;

cd /tmp/cc

# we want tesseract (for OCR)
echo '
http://dl-cdn.alpinelinux.org/alpine/v3.5/main
http://dl-cdn.alpinelinux.org/alpine/v3.5/community
' >| /etc/apk/repositories;
apk update; apk upgrade;
http://dl-cdn.alpinelinux.org/alpine/latest-stable/main/
http://dl-cdn.alpinelinux.org/alpine/latest-stable/community/
' >|/etc/apk/repositories
apk update
apk upgrade

apk add --update bash zsh alpine-sdk perl;
apk add --update bash zsh alpine-sdk perl

# (needed by various static builds below)
# Even though we're going to (re)builid tesseract from source statically, get its dependencies setup by
# installing it now, too.
apk add autoconf automake libtool tesseract-ocr-dev;


# Now comes the not-so-fun parts... Many packages _only_ provide .so files in their distros -- not the .a
# needed files for building something with it statically. Step through them now...

apk add autoconf automake libtool tesseract-ocr-dev zlib-dev cmake

# libgif
wget https://sourceforge.net/projects/giflib/files/giflib-5.1.4.tar.gz;
zcat giflib*tar.gz | tar xf -;
cd giflib*/;
./configure --disable-shared --enable-static; make; make install;
hash -r;
cd -;

wget https://sourceforge.net/projects/giflib/files/giflib-5.1.4.tar.gz
zcat giflib*tar.gz | tar xf -
cd giflib*/
./configure --disable-shared --enable-static
make
make install
hash -r
cd -
rm -rf giflib*

# libwebp
git clone https://github.com/webmproject/libwebp;
cd libwebp;
./autogen.sh;
./configure --disable-shared --enable-static; make; make install;
cd -;

git clone https://github.com/webmproject/libwebp
cd libwebp
./autogen.sh
./configure --disable-shared --enable-static
make
make install
cd -
rm -r libwebp/

# leptonica
wget http://www.leptonica.org/source/leptonica-1.73.tar.gz;
zcat leptonica*tar.gz | tar xf -;
cd leptonica*/;
./configure --disable-shared --enable-static; make; make install;
hash -r;
cd -;

wget http://www.leptonica.org/source/leptonica-1.83.0.tar.gz
zcat leptonica*tar.gz | tar xf -
cd leptonica*/
./configure --disable-shared --enable-static
make
make install
hash -r
cd -
rm -rf leptonica*

# tesseract
git clone https://github.com/tesseract-ocr/tesseract;
cd tesseract;
./autogen.sh;
./configure --disable-shared --enable-static; make; make install;
cd -;

git clone https://github.com/tesseract-ocr/tesseract
cd tesseract
./autogen.sh
./configure --disable-shared --enable-static
make
make install
cd -
rm -r tesseract/

# gpac
git clone https://github.com/gpac/gpac.git
cd gpac/
./configure
make
make install # This may fail but its ok, we got what the binary we needed
cd -
rm -r gpac

# libjpeg
git clone https://github.com/winlibs/libjpeg
cd libjpeg
mkdir build/
cd build
cmake -G"Unix Makefiles" ..
make
cp libjpeg.a /usr/lib/
cd /tmp/cc
rm -r libjpeg

# libtiff
git clone https://github.com/libsdl-org/libtiff
cd libtiff
autoreconf -i
./configure --disable-shared --enable-static
make
cp libtiff/.libs/libtiff.a /usr/lib/
cd -
rm -r libtiff

# All binary files are installed successfully

# ccextractor -- build static
git clone https://github.com/CCExtractor/ccextractor;
cd ccextractor/linux/;
perl -i -pe 's/O3 /O3 -static /' Makefile;
set +e; # this _will_ FAIL at the end..
make ENABLE_OCR=yes;
set -e;
# I confess hand-compiling (cherrypicking which .a to use when there are 2, etc.) is fragile...
# But it was the _only_ way I could get a fully static build after hours of thrashing...
gcc -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -O3 -std=gnu99 -s -DENABLE_OCR -DPNG_NO_CONFIG_H -I/usr/local/include/tesseract -I/usr/local/include/leptonica objs/*.o -o ccextractor \
--static -lm -lgpac \
/usr/local/lib/libtesseract.a \
/usr/local/lib/liblept.a \
/usr/local/lib/libgif.a \
/usr/local/lib/libwebp.a \
/usr/lib/libjpeg.a \
/usr/lib/libtiff.a \
/usr/lib/libgomp.a \
-lstdc++;

cp ccextractor /tmp/cc/ccextractor.next;
cd -;
git clone https://github.com/CCExtractor/ccextractor
cd ccextractor/linux/
perl -i -pe 's/O3 /O3 -static /' Makefile.am
set +e
# IG this is where problem occurs
autoreconf --install
./configure --without-rust
make ENABLE_OCR=yes
set -e

# Now we will build `ccextracter` binary using gcc compiler
gcc -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT \
-O3 -std=gnu99 -s -DENABLE_OCR -DPNG_NO_CONFIG_H -I/usr/local/include/tesseract -I/usr/local/include/leptonica \
objs/*.o \
temp.c -o temp \
-lstdc++ --static -lm \
/usr/local/lib/libtesseract.a \
/usr/local/lib/libleptonica.a \
/usr/local/lib/libgif.a \
/usr/local/lib/libwebp.a \
/usr/lib/libgomp.a \
/usr/local/lib/libgpac_static.a \
/usr/lib/libjpeg.a \
/usr/lib/libtiff.a
# Every library file exist but
# objs/*.o is not there

cp ccextractor /tmp/cc/ccextractor.next
cd -

# get english lang trained data
wget https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata;
wget https://github.com/tesseract-ocr/tessdata/blob/main/eng.traineddata

0 comments on commit 6f70cdb

Please sign in to comment.