-
Notifications
You must be signed in to change notification settings - Fork 446
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix!: Replace old links and setup for libraries
What I did: - Replace non-working link - Replace not working or irrelevent commands - All library are installed correctly - To compile the ccextracter with all libraries we need to have correct configure file and Makefile which makes this bash script file breaking right now Goal for now: - to compile and make `ccextracter` binary(which is not fullfiling) Code breaks at: When it clones ccextracter and try to run `make` command and it breaks
- Loading branch information
1 parent
c550726
commit 6f70cdb
Showing
1 changed file
with
118 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,107 +1,141 @@ | ||
#!/usr/bin/env -S sh -ex | ||
|
||
#################################################################### | ||
# setup by tracey apr 2012 | ||
# updated version dec 2016 | ||
# see: http://www.ccextractor.org/doku.php | ||
#################################################################### | ||
|
||
|
||
# build it static! | ||
# simplest way is with linux alpine | ||
# hop onto box with docker on it and cd to dir of the file you are staring at | ||
# You will get a static-compiled binary and english language library file in the end. | ||
if [ ! -e /tmp/cc/ccextractor-README.txt ]; then | ||
rm -rf /tmp/cc; | ||
mkdir -p -m777 /tmp/cc; | ||
mkdir -p -m777 ../lib/tessdata/; | ||
cp ccextractor-README.txt /tmp/cc/; | ||
sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest /tmp/cc/ccextractor-README.txt; | ||
# NOTE: _AFTER_ testing/validating, you can promote it from "ccextractor.next" to "ccextractor"... ;-) | ||
cp /tmp/cc/*traineddata ../lib/tessdata/; | ||
chmod go-w ../lib/tessdata/; | ||
exit 0; | ||
#!/bin/sh | ||
# !/usr/bin/env -S sh -ex | ||
|
||
if [ ! -e /tmp/cc/build-static.sh ]; then | ||
rm -rf /tmp/cc | ||
mkdir -p -m777 /tmp/cc | ||
mkdir -p -m777 ../lib/tessdata/ | ||
cp build-static.sh /tmp/cc/ | ||
echo 'I am making docker container' | ||
sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest sh -c /tmp/cc/build-static.sh | ||
# sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest sh -c /tmp/cc/script.sh | ||
cp /tmp/cc/*traineddata ../lib/tessdata/ | ||
chmod go-w ../lib/tessdata/ | ||
exit 0 | ||
fi | ||
|
||
# NOW we are inside docker container... | ||
cd /tmp/cc; | ||
|
||
cd /tmp/cc | ||
|
||
# we want tesseract (for OCR) | ||
echo ' | ||
http://dl-cdn.alpinelinux.org/alpine/v3.5/main | ||
http://dl-cdn.alpinelinux.org/alpine/v3.5/community | ||
' >| /etc/apk/repositories; | ||
apk update; apk upgrade; | ||
http://dl-cdn.alpinelinux.org/alpine/latest-stable/main/ | ||
http://dl-cdn.alpinelinux.org/alpine/latest-stable/community/ | ||
' >|/etc/apk/repositories | ||
apk update | ||
apk upgrade | ||
|
||
apk add --update bash zsh alpine-sdk perl; | ||
apk add --update bash zsh alpine-sdk perl | ||
|
||
# (needed by various static builds below) | ||
# Even though we're going to (re)builid tesseract from source statically, get its dependencies setup by | ||
# installing it now, too. | ||
apk add autoconf automake libtool tesseract-ocr-dev; | ||
|
||
|
||
# Now comes the not-so-fun parts... Many packages _only_ provide .so files in their distros -- not the .a | ||
# needed files for building something with it statically. Step through them now... | ||
|
||
apk add autoconf automake libtool tesseract-ocr-dev zlib-dev cmake | ||
|
||
# libgif | ||
wget https://sourceforge.net/projects/giflib/files/giflib-5.1.4.tar.gz; | ||
zcat giflib*tar.gz | tar xf -; | ||
cd giflib*/; | ||
./configure --disable-shared --enable-static; make; make install; | ||
hash -r; | ||
cd -; | ||
|
||
wget https://sourceforge.net/projects/giflib/files/giflib-5.1.4.tar.gz | ||
zcat giflib*tar.gz | tar xf - | ||
cd giflib*/ | ||
./configure --disable-shared --enable-static | ||
make | ||
make install | ||
hash -r | ||
cd - | ||
rm -rf giflib* | ||
|
||
# libwebp | ||
git clone https://github.com/webmproject/libwebp; | ||
cd libwebp; | ||
./autogen.sh; | ||
./configure --disable-shared --enable-static; make; make install; | ||
cd -; | ||
|
||
git clone https://github.com/webmproject/libwebp | ||
cd libwebp | ||
./autogen.sh | ||
./configure --disable-shared --enable-static | ||
make | ||
make install | ||
cd - | ||
rm -r libwebp/ | ||
|
||
# leptonica | ||
wget http://www.leptonica.org/source/leptonica-1.73.tar.gz; | ||
zcat leptonica*tar.gz | tar xf -; | ||
cd leptonica*/; | ||
./configure --disable-shared --enable-static; make; make install; | ||
hash -r; | ||
cd -; | ||
|
||
wget http://www.leptonica.org/source/leptonica-1.83.0.tar.gz | ||
zcat leptonica*tar.gz | tar xf - | ||
cd leptonica*/ | ||
./configure --disable-shared --enable-static | ||
make | ||
make install | ||
hash -r | ||
cd - | ||
rm -rf leptonica* | ||
|
||
# tesseract | ||
git clone https://github.com/tesseract-ocr/tesseract; | ||
cd tesseract; | ||
./autogen.sh; | ||
./configure --disable-shared --enable-static; make; make install; | ||
cd -; | ||
|
||
git clone https://github.com/tesseract-ocr/tesseract | ||
cd tesseract | ||
./autogen.sh | ||
./configure --disable-shared --enable-static | ||
make | ||
make install | ||
cd - | ||
rm -r tesseract/ | ||
|
||
# gpac | ||
git clone https://github.com/gpac/gpac.git | ||
cd gpac/ | ||
./configure | ||
make | ||
make install # This may fail but its ok, we got what the binary we needed | ||
cd - | ||
rm -r gpac | ||
|
||
# libjpeg | ||
git clone https://github.com/winlibs/libjpeg | ||
cd libjpeg | ||
mkdir build/ | ||
cd build | ||
cmake -G"Unix Makefiles" .. | ||
make | ||
cp libjpeg.a /usr/lib/ | ||
cd /tmp/cc | ||
rm -r libjpeg | ||
|
||
# libtiff | ||
git clone https://github.com/libsdl-org/libtiff | ||
cd libtiff | ||
autoreconf -i | ||
./configure --disable-shared --enable-static | ||
make | ||
cp libtiff/.libs/libtiff.a /usr/lib/ | ||
cd - | ||
rm -r libtiff | ||
|
||
# All binary files are installed successfully | ||
|
||
# ccextractor -- build static | ||
git clone https://github.com/CCExtractor/ccextractor; | ||
cd ccextractor/linux/; | ||
perl -i -pe 's/O3 /O3 -static /' Makefile; | ||
set +e; # this _will_ FAIL at the end.. | ||
make ENABLE_OCR=yes; | ||
set -e; | ||
# I confess hand-compiling (cherrypicking which .a to use when there are 2, etc.) is fragile... | ||
# But it was the _only_ way I could get a fully static build after hours of thrashing... | ||
gcc -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -O3 -std=gnu99 -s -DENABLE_OCR -DPNG_NO_CONFIG_H -I/usr/local/include/tesseract -I/usr/local/include/leptonica objs/*.o -o ccextractor \ | ||
--static -lm -lgpac \ | ||
/usr/local/lib/libtesseract.a \ | ||
/usr/local/lib/liblept.a \ | ||
/usr/local/lib/libgif.a \ | ||
/usr/local/lib/libwebp.a \ | ||
/usr/lib/libjpeg.a \ | ||
/usr/lib/libtiff.a \ | ||
/usr/lib/libgomp.a \ | ||
-lstdc++; | ||
|
||
cp ccextractor /tmp/cc/ccextractor.next; | ||
cd -; | ||
git clone https://github.com/CCExtractor/ccextractor | ||
cd ccextractor/linux/ | ||
perl -i -pe 's/O3 /O3 -static /' Makefile.am | ||
set +e | ||
# IG this is where problem occurs | ||
autoreconf --install | ||
./configure --without-rust | ||
make ENABLE_OCR=yes | ||
set -e | ||
|
||
# Now we will build `ccextracter` binary using gcc compiler | ||
gcc -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT \ | ||
-O3 -std=gnu99 -s -DENABLE_OCR -DPNG_NO_CONFIG_H -I/usr/local/include/tesseract -I/usr/local/include/leptonica \ | ||
objs/*.o \ | ||
temp.c -o temp \ | ||
-lstdc++ --static -lm \ | ||
/usr/local/lib/libtesseract.a \ | ||
/usr/local/lib/libleptonica.a \ | ||
/usr/local/lib/libgif.a \ | ||
/usr/local/lib/libwebp.a \ | ||
/usr/lib/libgomp.a \ | ||
/usr/local/lib/libgpac_static.a \ | ||
/usr/lib/libjpeg.a \ | ||
/usr/lib/libtiff.a | ||
# Every library file exist but | ||
# objs/*.o is not there | ||
|
||
cp ccextractor /tmp/cc/ccextractor.next | ||
cd - | ||
|
||
# get english lang trained data | ||
wget https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata; | ||
wget https://github.com/tesseract-ocr/tessdata/blob/main/eng.traineddata |