Skip to content
Snippets Groups Projects
Commit 3136e706 authored by Ossi Laine's avatar Ossi Laine
Browse files

Added tika Dockerfile

parent 14d7de53
No related branches found
No related tags found
No related merge requests found
FROM ubuntu:latest
MAINTAINER david@logicalspark.com
ENV TIKA_VERSION 1.22
ENV TIKA_SERVER_URL https://www.apache.org/dist/tika/tika-server-$TIKA_VERSION.jar
RUN apt-get update \
&& apt-get install gnupg openjdk-11-jre-headless curl gdal-bin tesseract-ocr \
tesseract-ocr-eng tesseract-ocr-ita tesseract-ocr-fra tesseract-ocr-spa tesseract-ocr-deu -y \
&& curl -sSL https://people.apache.org/keys/group/tika.asc -o /tmp/tika.asc \
&& gpg --import /tmp/tika.asc \
&& curl -sSL "$TIKA_SERVER_URL.asc" -o /tmp/tika-server-${TIKA_VERSION}.jar.asc \
&& NEAREST_TIKA_SERVER_URL=$(curl -sSL http://www.apache.org/dyn/closer.cgi/${TIKA_SERVER_URL#https://www.apache.org/dist/}\?asjson\=1 \
| awk '/"path_info": / { pi=$2; }; /"preferred":/ { pref=$2; }; END { print pref " " pi; };' \
| sed -r -e 's/^"//; s/",$//; s/" "//') \
&& echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
&& curl -sSL "$NEAREST_TIKA_SERVER_URL" -o /tika-server-${TIKA_VERSION}.jar \
&& apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
EXPOSE 9998
ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment