Skip to content

Commit

Permalink
Add Apache Atlas version 2.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
sburn committed Dec 15, 2022
1 parent e15d680 commit 7cfa4f2
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 104 deletions.
12 changes: 0 additions & 12 deletions .github/FUNDING.yml

This file was deleted.

86 changes: 57 additions & 29 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,59 +1,87 @@
FROM scratch
FROM ubuntu:18.04
FROM ubuntu:20.04 as build
LABEL maintainer="[email protected]"
ARG VERSION=2.1.0

ARG VERSION=2.2.0
ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
ENV MAVEN_OPTS="-Xms2g -Xmx2g"
ENV JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"

RUN mkdir -p /tmp/atlas-src \
&& mkdir -p /apache-atlas \
&& mkdir -p /gremlin

COPY pom.xml.patch /tmp/atlas-src/

RUN apt-get update \
&& apt-get -y upgrade \
&& apt-get -y install apt-utils \
&& apt-get -y install \
maven \
wget \
git \
python \
openjdk-8-jdk-headless \
patch \
unzip \
unzip \
&& cd /tmp \
&& wget http://mirror.linux-ia64.org/apache/atlas/${VERSION}/apache-atlas-${VERSION}-sources.tar.gz \
&& mkdir -p /opt/gremlin \
&& mkdir -p /tmp/atlas-src \
&& wget https://archive.apache.org/dist/atlas/${VERSION}/apache-atlas-${VERSION}-sources.tar.gz \
&& tar --strip 1 -xzvf apache-atlas-${VERSION}-sources.tar.gz -C /tmp/atlas-src \
&& rm apache-atlas-${VERSION}-sources.tar.gz \
&& cd /tmp/atlas-src \
&& sed -i 's/http:\/\/repo1.maven.org\/maven2/https:\/\/repo1.maven.org\/maven2/g' pom.xml \
&& export MAVEN_OPTS="-Xms2g -Xmx2g" \
&& export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \
&& mvn clean -Dmaven.repo.local=/tmp/.mvn-repo -Dhttps.protocols=TLSv1.2 -DskipTests package -Pdist,embedded-hbase-solr \
&& tar -xzvf /tmp/atlas-src/distro/target/apache-atlas-${VERSION}-server.tar.gz -C /opt \
&& patch -b -f < pom.xml.patch \
&& mvn clean \
-Dmaven.repo.local=/tmp/atlas-src/.mvn-repo \
-Dhttps.protocols=TLSv1.2 \
-DskipTests \
-Drat.skip=true \
package -Pdist,embedded-hbase-solr \
&& tar --strip 1 -xzvf /tmp/atlas-src/distro/target/apache-atlas-${VERSION}-server.tar.gz -C /apache-atlas \
&& rm -Rf /tmp/atlas-src \
&& rm -Rf /tmp/.mvn-repo \
&& apt-get -y --purge remove \
maven \
git \
&& apt-get -y remove openjdk-11-jre-headless \
unzip \
&& apt-get -y autoremove \
&& apt-get -y clean

VOLUME ["/opt/apache-atlas-${VERSION}/conf", "/opt/apache-atlas-${VERSION}/logs"]
FROM ubuntu:20.04
LABEL maintainer="[email protected]"
ARG VERSION=2.2.0
ENV JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"

COPY atlas_start.py.patch atlas_config.py.patch /opt/apache-atlas-${VERSION}/bin/
COPY --from=build /apache-atlas /apache-atlas

RUN cd /opt/apache-atlas-${VERSION}/bin \
&& patch -b -f < atlas_start.py.patch \
&& patch -b -f < atlas_config.py.patch
RUN apt-get update \
&& apt-get -y upgrade \
&& apt-get -y install apt-utils \
&& apt-get -y install \
maven \
wget \
python \
openjdk-8-jdk-headless \
patch

COPY conf/hbase/hbase-site.xml.template /opt/apache-atlas-${VERSION}/conf/hbase/hbase-site.xml.template
COPY conf/atlas-env.sh /opt/apache-atlas-${VERSION}/conf/atlas-env.sh
COPY conf/hbase/hbase-site.xml.template /apache-atlas/conf/hbase/hbase-site.xml.template
COPY atlas_start.py.patch atlas_config.py.patch /apache-atlas/bin/
COPY conf/atlas-env.sh /apache-atlas/conf/atlas-env.sh
COPY conf/gremlin /gremlin/

COPY conf/gremlin /opt/gremlin/
WORKDIR /apache-atlas/bin
RUN patch -b -f < atlas_start.py.patch \
&& patch -b -f < atlas_config.py.patch

RUN cd /opt/apache-atlas-${VERSION} \
&& ./bin/atlas_start.py -setup || true
WORKDIR /apache-atlas/conf
RUN sed -i 's/\${atlas.log.dir}/\/apache-atlas\/logs/g' atlas-log4j.xml \
&& sed -i 's/\${atlas.log.file}/application.log/g' atlas-log4j.xml

RUN cd /opt/apache-atlas-${VERSION} \
&& ./bin/atlas_start.py & \
touch /opt/apache-atlas-${VERSION}/logs/application.log \
&& tail -f /opt/apache-atlas-${VERSION}/logs/application.log | sed '/AtlasAuthenticationFilter.init(filterConfig=null)/ q' \
WORKDIR /apache-atlas/bin
RUN ./atlas_start.py -setup || true
RUN ./atlas_start.py & \
touch /apache-atlas/logs/application.log \
&& tail -f /apache-atlas/logs/application.log | sed '/Defaulting to local host name/ q' \
&& sleep 10 \
&& /opt/apache-atlas-${VERSION}/bin/atlas_stop.py
&& ./atlas_stop.py \
&& truncate -s0 /apache-atlas/logs/application.log

ENTRYPOINT ["/bin/bash", "-c", "/apache-atlas/bin/atlas_start.py; tail -fF /apache-atlas/logs/application.log"]
55 changes: 24 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,26 @@
[![Atlas version](https://img.shields.io/badge/Atlas-2.1.0-brightgreen.svg)](https://github.com/sburn/docker-apache-atlas)
[![Atlas version](https://img.shields.io/badge/Atlas-2.2.0-brightgreen.svg)](https://github.com/sburn/docker-apache-atlas)
[![License: Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)
[![Docker Pulls](https://img.shields.io/docker/pulls/sburn/apache-atlas.svg)](https://hub.docker.com/repository/docker/sburn/apache-atlas)

Apache Atlas Docker image
=======================================

This `Apache Atlas` is built from the 2.1.0-release source tarball and patched to be run in a Docker container.
This `Apache Atlas` is built from the 2.2.0-release source tarball and patched to be run in a Docker container.

Atlas is built with `embedded HBase + Solr` and it is pre-initialized, so you can use it right after image download without additional steps.

If you want to use external Atlas backends, set them up according to [the documentation](https://atlas.apache.org/#/Configuration).

Basic usage
-----------
1. Pull the latest release image:

```bash
docker pull sburn/apache-atlas
```

2. Start Apache Atlas in a container exposing Web-UI port 21000:
1. Start Apache Atlas in a container exposing Web-UI port 21000:

```bash
docker run -d \
-p 21000:21000 \
--name atlas \
sburn/apache-atlas \
/opt/apache-atlas-2.1.0/bin/atlas_start.py
sburn/apache-atlas
```

Please, take into account that the first startup of Atlas may take up to few mins depending on host machine performance before web-interface become available at `http://localhost:21000/`
Expand All @@ -39,7 +33,7 @@ Usage options
Gracefully stop Atlas:

```bash
docker exec -ti atlas /opt/apache-atlas-2.1.0/bin/atlas_stop.py
docker exec -ti atlas /apache-atlas/bin/atlas_stop.py
```

Check Atlas startup script output:
Expand All @@ -48,16 +42,16 @@ Check Atlas startup script output:
docker logs atlas
```

Check interactively Atlas application.log (useful at the first run and for debugging during workload):
Check Atlas application.log (useful at the first run and for debugging during workload):

```bash
docker exec -ti atlas tail -f /opt/apache-atlas-2.1.0/logs/application.log
docker exec -ti atlas tail -f /apache-atlas/logs/application.log
```

Run the example (this will add sample types and instances along with traits):

```bash
docker exec -ti atlas /opt/apache-atlas-2.1.0/bin/quick_start.py
docker exec -ti atlas /apache-atlas/bin/quick_start.py
```

Start Atlas overriding settings by environment variables
Expand All @@ -74,39 +68,37 @@ docker run --detach \
-XX:+PrintHeapAtGC -XX:+PrintGCTimeStamps" \
-p 21000:21000 \
--name atlas \
sburn/apache-atlas \
/opt/apache-atlas-2.1.0/bin/atlas_start.py
sburn/apache-atlas
```

Start Atlas exposing logs directory on the host to view them directly:
Explore logs: start Atlas exposing logs directory on the host

```bash
docker run --detach \
-v ${PWD}/atlas-logs:/opt/apache-atlas-2.1.0/logs \
-v ${PWD}/atlas-logs:/apache-atlas/logs \
-p 21000:21000 \
--name atlas \
sburn/apache-atlas \
/opt/apache-atlas-2.1.0/bin/atlas_start.py
sburn/apache-atlas
```

Start Atlas exposing conf directory on the host to place and edit configuration files directly:
Custom configuration: start Atlas exposing conf directory on the host

```bash
docker run --detach \
-v ${PWD}/pre-conf:/opt/apache-atlas-2.1.0/conf \
-v ${PWD}/pre-conf:/apache-atlas/conf \
-p 21000:21000 \
--name atlas \
sburn/apache-atlas \
/opt/apache-atlas-2.1.0/bin/atlas_start.py
sburn/apache-atlas
```
Start Atlas with data directory mounted on the host to provide its persistency:

Data parsistency: start Atlas with data directory mounted on the host

```bash
docker run --detach \
-v ${PWD}/data:/opt/apache-atlas-2.1.0/data \
-v ${PWD}/data:/apache-atlas/data \
-p 21000:21000 \
--name atlas \
sburn/apache-atlas \
/opt/apache-atlas-2.1.0/bin/atlas_start.py
sburn/apache-atlas
```

Tinkerpop Gremlin support
Expand All @@ -118,15 +110,15 @@ Image contains build-in extras for those who want to play with Janusgraph, and A

2. Install `gremlin-server` and `gremlin-console` into the container by running included automation script:
```bash
docker exec -ti atlas /opt/gremlin/install-gremlin.sh
docker exec -ti atlas /gremlin/install-gremlin.sh
```
3. Start `gremlin-server` in the same container:
```bash
docker exec -d atlas /opt/gremlin/start-gremlin-server.sh
docker exec -d atlas /gremlin/start-gremlin-server.sh
```
4. Finally, run `gremlin-console` interactively:
```bash
docker exec -ti atlas /opt/gremlin/run-gremlin-console.sh
docker exec -ti atlas /gremlin/run-gremlin-console.sh
```
Gremlin-console usage example:
```bash
Expand Down Expand Up @@ -161,6 +153,7 @@ The following environment variables are available for configuration:
| ATLAS_PID_DIR | <none> | Where pid files are stored. Defatult is logs directory under the base install location
| ATLAS_EXPANDED_WEBAPP_DIR | <none> | Where do you want to expand the war file. By Default it is in /server/webapp dir under the base install dir.
For additional infomation about configurable options check official Apache Atlas documentation.
Bug Tracker
-----------
Expand Down
2 changes: 1 addition & 1 deletion atlas_config.py.patch
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
+ break
+
+ if not os.path.exists(pid_file):
+ sys.stdout.write('\nApache Atlas startup failed!\nCheck logs: /opt/apache-atlas-2.1.0/logs/application.log')
+ sys.stdout.write('\nApache Atlas startup failed!\nCheck logs: /apache-atlas/logs/application.log')
+ sys.stdout.flush()
+ exit()
+ break
Expand Down
43 changes: 14 additions & 29 deletions atlas_start.py.patch
Original file line number Diff line number Diff line change
@@ -1,41 +1,32 @@
--- atlas_start.py.orig 2019-05-03 08:22:00.000000000 +0300
+++ atlas_start.py 2020-01-16 01:37:16.147611498 +0300
@@ -18,6 +18,9 @@
import os
import sys
import traceback
+import os.path
+import time
+from time import sleep

import atlas_config as mc

@@ -114,6 +117,9 @@
mc.configure_hbase(atlas_home)
--- atlas_start.py.orig 2022-12-14 14:49:45.000000000 +0000
+++ atlas_start.py 2022-12-15 15:37:31.143089911 +0000
@@ -117,6 +117,9 @@
mc.run_hbase_action(mc.hbaseBinDir(atlas_home), "start", hbase_conf_dir, logdir)
print "hbase started."

print("Local HBase started!")
+ if is_setup:
+ print ("Sleeping 60s due too setup (init run)...")
+ sleep(60)

#solr setup
if mc.is_solr_local(confdir):
@@ -128,6 +134,9 @@
@@ -135,6 +138,9 @@
mc.run_solr(mc.solrBinDir(atlas_home), "start", mc.get_solr_zk_url(confdir), mc.solrPort(), logdir, True, mc.solrHomeDir(atlas_home))

mc.run_solr(mc.solrBinDir(atlas_home), "start", mc.get_solr_zk_url(confdir), mc.solrPort(), logdir)
print "solr started."
print("Local Solr started!")
+ if is_setup:
+ print ("Sleeping 60s due too setup (init run)...")
+ sleep(60)

print "setting up solr collections..."
mc.create_solr_collection(mc.solrBinDir(atlas_home), mc.solrConfDir(atlas_home), "vertex_index", logdir)
@@ -145,8 +154,27 @@
print("\nCreating Solr collections for Atlas using config: " + mc.solrConfDir(atlas_home))

@@ -155,8 +161,22 @@
web_app_path = mc.convertCygwinPath(web_app_path)
if not is_setup:
start_atlas_server(atlas_classpath, atlas_pid_file, jvm_logdir, jvm_opts_list, web_app_path)
- mc.wait_for_startup(confdir, 300)
- print "Apache Atlas Server started!!!\n"
- print("Apache Atlas Server started!!!\n")
+
+ mc.wait_for_startup(confdir, 600)
+ print ("Apache Atlas Server process started!\n")
+
Expand All @@ -48,14 +39,8 @@
+ except:
+ pid = None
+ if not pid:
+ sys.stderr.write("No PID file found! Server is not running?\nCheck logs: /opt/apache-atlas-2.1.0/logs/application.log\n\n")
+ sys.stderr.write("No PID file found! Server is not running?\nCheck logs: /apache-atlas/logs/application.log\n\n")
+ return
+
+
+ while os.path.exists(atlas_pid_file):
+ time.sleep(1)
+
+ print ("Apache Atlas stopped!\n")
+
else:
process = mc.java("org.apache.atlas.web.setup.AtlasSetup", [], atlas_classpath, jvm_opts_list, jvm_logdir)
Expand Down
4 changes: 4 additions & 0 deletions conf/hbase/hbase-site.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,8 @@
<name>hbase.regionserver.port</name>
<value>61520</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
14 changes: 14 additions & 0 deletions pom.xml.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
--- pom.xml.orig 2022-12-14 10:46:31.615039877 +0000
+++ pom.xml 2022-12-14 10:33:21.019600529 +0000
@@ -834,6 +834,11 @@
<name>Typesafe Repository</name>
<url>http://repo.typesafe.com/typesafe/releases/</url>
</repository>
+ <repository>
+ <id>maven-restlet</id>
+ <name>Public online Restlet repository</name>
+ <url>https://maven.restlet.talend.com</url>
+ </repository>
</repositories>

<dependencyManagement>
2 changes: 1 addition & 1 deletion start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ docker run --detach \
-p 21000:21000 \
--name atlas \
sburn/apache-atlas \
/opt/apache-atlas-2.1.0/bin/atlas_start.py
/apache-atlas/bin/atlas_start.py
2 changes: 1 addition & 1 deletion stop.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/sh

docker exec -ti atlas /opt/apache-atlas-2.1.0/bin/atlas_stop.py
docker exec -ti atlas /apache-atlas/bin/atlas_stop.py

0 comments on commit 7cfa4f2

Please sign in to comment.