Commits

Guan-Zhong Huang committed 7889488

tesseract: Added

  • Participants
  • Parent commits 4bf5822

Comments (0)

Files changed (5)

File tesseract/README

+Tesseract is a commercial quality OCR engine originally developed at HP 
+between 1985 and 1995. In 1995, this engine was among the top 3 evaluated 
+by UNLV. It was open-sourced by HP and UNLV in 2005.
+
+You will need to get one of the language packs in order to do anything
+useful with tesseract, and that language pack tarball should be present
+in the same directory as the SlackBuild script when the package is created.
+See http://code.google.com/p/tesseract-ocr/downloads/list for a list of
+all available language packs.  Note that you can install more than one
+(or even all) of the language packs, as they do not conflict with each
+other.  The build script defaults to use English, but this is easily 
+changed by passing an alternate value on the command line.
+
+Here is the relevant code from the build script:
+  # Language pack(s) to use
+  # We'll install English by default, but you can pass another one (or all) 
+  # of them on the command line (space delimited).  If you pass more than one
+  # (again, space delimited), you must enclose the string in quotes.  Examples:
+  # TESSLANG=fra ./tesseract.SlackBuild
+  # TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild
+  TESSLANG=${TESSLANG:-eng}       # Default to English
+
+It requires leptonica.

File tesseract/include_fix.patch

+diff --git a/viewer/svutil.cpp b/viewer/svutil.cpp
+index b7fa31f..2bc1cc9 100644
+--- a/viewer/svutil.cpp
++++ b/viewer/svutil.cpp
+@@ -30,6 +30,7 @@ struct addrinfo {
+   int ai_protocol;
+ };
+ #else
++#include <unistd.h>
+ #include <arpa/inet.h>
+ #include <netinet/in.h>
+ #include <pthread.h>

File tesseract/slack-desc

+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.  Line
+# up the first '|' above the ':' following the base package name, and the '|' on
+# the right side marks the last column you can put a character in.  You must make
+# exactly 11 lines for the formatting to be correct.  It's also customary to
+# leave one space after the ':'.
+
+         |-----handy-ruler--------------------------------------------------|
+tesseract: Tesseract (OCR Engine)
+tesseract:
+tesseract: Tesseract is a commercial quality OCR engine originally developed
+tesseract: at HP between 1985 and 1995. In 1995, this engine was among the 
+tesseract: top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in 
+tesseract: 2005.
+tesseract:
+tesseract: http://code.google.com/p/tesseract-ocr/
+tesseract:
+tesseract:
+tesseract:

File tesseract/tesseract.SlackBuild

+#!/bin/sh
+
+# Pierre Cazenave 10/11/2007.
+# Updated 25/01/2009.
+# Updated 08/04/2009.
+# Updated 28/05/2009 for Slackware64.
+# Updated 08/09/2010 Fixed language packs logic (Thanks to Ed Rozenberg)
+# 
+# Modified by Robby Workman <rworkman@slackbuilds.org> for better
+#   consistency with our other scripts
+# Thanks to S+*n_Pe*rm*n for a bug report from OCRopus.
+
+# Copyright 2009-2010 Pierre Cazenave <pwcazenave {at} gmail [dot] com>
+# Copyright 2012  Guan-Zhong Huang <imprazaguy@gmail.com>
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+PRGNAM=tesseract
+VERSION=${VERSION:-3.01}
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+
+if [ -z "$ARCH" ]; then
+  case "$( uname -m )" in
+    i?86) ARCH=i486 ;;
+    arm*) ARCH=arm ;;
+       *) ARCH=$( uname -m ) ;;
+  esac
+fi
+
+CWD=$(pwd)
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+# Language pack(s) to use
+# We'll install English by default, but you can pass another one (or all) 
+# of them on the command line (space delimited).  If you pass more than one
+# (again, space delimited), you must enclose the string in quotes.  Examples:
+# TESSLANG=fra ./tesseract.SlackBuild
+# TESSLANG="deu deu-f eng eus fra ita nld por spa vie" ./tesseract.SlackBuild
+TESSLANG=${TESSLANG:-"eng"}	# English only
+
+if [ "$ARCH" = "i486" ]; then
+  SLKCFLAGS="-O2 -march=i486 -mtune=i686"
+  LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+  SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+  LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+  SLKCFLAGS="-O2 -fPIC"
+  LIBDIRSUFFIX="64"
+else
+  SLKCFLAGS="-O2"
+  LIBDIRSUFFIX=""
+fi
+
+set -e
+
+rm -rf $TMP/$PRGNAM-$VERSION $PKG
+mkdir -p $TMP $PKG $OUTPUT
+cd $TMP
+tar xvf $CWD/$PRGNAM-$VERSION.tar.gz
+cd $PRGNAM-$VERSION
+# Unpack language data files
+mkdir -p tesseract-ocr/tessdata
+for lang in $(echo "$TESSLANG") ; do
+  # Version 3.01 language data file is preferred
+  if [ -r "$CWD/tesseract-ocr-3.01.$lang.tar.gz" ]; then
+    tar xvf $CWD/tesseract-ocr-3.01.$lang.tar.gz
+  elif [ -r "$CWD/$lang.traineddata.gz" ]; then
+    gunzip -c $CWD/$lang.traineddata.gz > tesseract-ocr/tessdata/$lang.traineddata
+  else
+    echo "Couldn't find language data file for '$lang'"
+    exit 1
+  fi
+done
+chown -R root:root .
+find . \
+ \( -perm 777 -o -perm 775 -o -perm 711 -o -perm 555 -o -perm 511 \) \
+ -exec chmod 755 {} \; -o \
+ \( -perm 666 -o -perm 664 -o -perm 600 -o -perm 444 -o -perm 440 -o -perm 400 \) \
+ -exec chmod 644 {} \;
+
+# Fix missing include file
+patch -p1 < $CWD/include_fix.patch
+
+./autogen.sh
+CFLAGS="$SLKCFLAGS" \
+CXXFLAGS="$SLKCFLAGS" \
+./configure \
+  --prefix=/usr \
+  --libdir=/usr/lib${LIBDIRSUFFIX} \
+  --sysconfdir=/etc \
+  --localstatedir=/var \
+  --mandir=/usr/man \
+  --disable-static \
+  --build=$ARCH-slackware-linux
+
+make
+make install DESTDIR=$PKG
+# Install language data
+mv tesseract-ocr/tessdata/* $PKG/usr/share/tessdata
+
+find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
+  | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a AUTHORS COPYING ChangeLog INSTALL NEWS README ReleaseNotes \
+  $PKG/usr/doc/$PRGNAM-$VERSION
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}

File tesseract/tesseract.info

+PRGNAM="tesseract"
+VERSION="2.04"
+HOMEPAGE="http://code.google.com/p/tesseract-ocr/"
+DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-3.01.tar.gz \
+          http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.01.eng.tar.gz"
+MD5SUM="1ba496e51a42358fb9d3ffe781b2d20a \
+        89c139a73e0e7b1225809fc7b226b6c9"
+DOWNLOAD_x86_64=""
+MD5SUM_x86_64=""
+MAINTAINER="Guan-Zhong Huang"
+EMAIL="imprazaguy@gmail.com"
+APPROVED=""