forked from google/fhir-data-pipes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompose-controller-spark-sql-single.yaml
80 lines (72 loc) · 2.94 KB
/
compose-controller-spark-sql-single.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This docker-compose configuration is for bringing up a pipeline controller
# along with a single-process Spark environment with a JDBC endpoint.
# Environment variables:
#
# PIPELINE_CONFIG: The directory that contains pipeline configurations, namely
# application.yaml and flink-conf.yaml files.
#
# DWH_ROOT: The directory where Parquet files are written. This is shared
# between all containers; the pipeline writes to it and Spark ones read.
#
# Note if local paths are used, they should start with `./ `or `../`. Also the
# mounted files should be readable by containers, e.g., world-readable.
#
# NOTES ON SPARK:
# This is a very simple single-process Spark configuration to be able to run
# SQL queries against Parquet files generated by the pipeline. It exposes an
# endpoint on port 10001 which can be used for JDBC connection from any SQL
# client.
#
# For a more complete configuration which shows different pieces that are needed
# for a cluster environment, please see `compose-controller-spark-sql.yaml`.
# NOTES ON METASTORE:
# This configuration uses the default embedded Derby database as Metastore for
# the thriftserver. Example config lines are provided (but commented out) that
# show how to use an external DB instead.
# OTHER CONFIGS:
# If you want to change Spark default configs, you can mount your config files
# to /opt/bitnami/spark/conf/
# https://spark.apache.org/docs/latest/configuration.html
version: '2'
services:
pipeline-controller:
# to force a build use `--build` option of `docker-compose up`.
build:
context: ..
container_name: pipeline-controller
volumes:
- ${PIPELINE_CONFIG}:/app/config:ro
- ${DWH_ROOT}:/dwh
ports:
- '8090:8080'
spark:
image: docker.io/bitnami/spark:3.3
container_name: spark-thriftserver
command:
- sbin/start-thriftserver.sh
environment:
- HIVE_SERVER2_THRIFT_PORT=10000
ports:
- '10001:10000'
- '4041:4040'
volumes:
- ${DWH_ROOT}:/dwh
# NON-EMBEDDED METASTORE CONFIG:
# If you want to persist the Metastore data, e.g., table and view
# definitions, you can use an external database by adjusting hive-site.xml
#- ./hive-site_example.xml:/opt/bitnami/spark/conf/hive-site.xml
# Note to use an external DB, you need to provide its driver jar too:
#- ./postgresql-42.6.0.jar:/opt/bitnami/spark/jars/postgresql-42.6.0.jar