-
-
Save erikdubbelboer/9c0e5ac6465c12f7925f823a2b110c10 to your computer and use it in GitHub Desktop.
Atomx Druid Dataproc scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
DRUID="0.11.0" | |
HADOOP="2.8.2" | |
CONNECTOR="1.6.1-hadoop2" | |
# Our build of druid is at ../druid-src | |
if [ ! -f ../druid-src/distribution/target/druid-${DRUID}-bin.tar.gz ]; then | |
echo "../druid-src/distribution/target/druid-${DRUID}-bin.tar.gz doesn't exist" | |
exit | |
fi | |
# Build a druid tar with all our configs and additional jars. | |
rm -r druid-0.* | |
cp ../druid-src/distribution/target/druid-${DRUID}-bin.tar.gz . | |
tar -xzf druid-${DRUID}-bin.tar.gz | |
# Untar the mysql connector to the extensions dir. | |
rm mysql-metadata-storage-*.tar.gz | |
cp ../druid-src/distribution/target/mysql-metadata-storage-*.tar.gz . | |
tar -xzf mysql-metadata-storage-*.tar.gz | |
mv mysql-metadata-storage druid-${DRUID}/extensions/ | |
mkdir druid-${DRUID}/extensions/druid-google-extensions | |
cp ../druid-src/extensions-contrib/google-extensions/target/druid-google-extensions-${DRUID}.jar druid-${DRUID}/extensions/druid-google-extensions/ | |
# Put the gcs connector where it can be found. | |
if [ ! -f gcs-connector-${CONNECTOR}.jar ]; then | |
wget https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-${CONNECTOR}.jar | |
fi | |
cp gcs-connector-${CONNECTOR}.jar druid-${DRUID}/lib/ | |
cp gcs-connector-${CONNECTOR}.jar druid-${DRUID}/extensions/druid-hdfs-storage/ | |
rm -r druid-${DRUID}/conf | |
rm -r druid-${DRUID}/conf-quickstart/ | |
# Use our own config instead of the Druid default ones | |
cp -r config/ druid-${DRUID}/config | |
rm druid-${DRUID}-bin.tar.gz | |
tar -czf druid-${DRUID}-bin.tar.gz druid-${DRUID} | |
# Distribute the druid tars to all instances | |
#gsutil cp druid-${DRUID}-bin.tar.gz gs://atomx/druid-${DRUID}-bin.tar.gz | |
cp druid-${DRUID}-bin.tar.gz ~/ | |
scp -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no druid-${DRUID}-bin.tar.gz druid-eu-historical-1: | |
#... | |
scp -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no druid-${DRUID}-bin.tar.gz druid-indexer-1: | |
#... | |
# Create a Dataproc hadoop initialization | |
mkdir -p initialization/jars/ | |
cat > initialization/initialization.sh << stop | |
#!/bin/bash | |
mkdir /tmp/initialization | |
gsutil -m rsync -r -d gs://hadoop-eu-atomx/${DRUID}-${HADOOP}/ /tmp/initialization/ | |
rm /usr/lib/hadoop-mapreduce/jackson-*-2.*.jar | |
cp /tmp/initialization/jars/* /usr/lib/hadoop-mapreduce/ | |
exit 0 | |
stop | |
rm -rf initialization/jars/*.jar | |
mkdir -p initialization/jars | |
cp druid-${DRUID}/lib/jackson-*-2.*.jar initialization/jars/ | |
gsutil -m rsync -d -r initialization/ gs://hadoop-eu-atomx/${DRUID}-${HADOOP}/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!-- | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. See accompanying LICENSE file. | |
--><!-- Put site-specific property overrides in this file. --><configuration> | |
<property> | |
<name>hadoop.tmp.dir</name> | |
<value>/hadoop/tmp</value> | |
<description>A base for other temporary directories.</description> | |
</property> | |
<property> | |
<name>fs.default.name</name> | |
<value>hdfs://hadoop-eu-1-m:8020</value> | |
<description>The old FileSystem used by FsShell.</description> | |
</property> | |
<property> | |
<name>fs.defaultFS</name> | |
<value>hdfs://hadoop-eu-1-m:8020</value> | |
<description> | |
The name of the default file system. A URI whose scheme and authority | |
determine the FileSystem implementation. The uri's scheme determines | |
the config property (fs.SCHEME.impl) naming the FileSystem | |
implementation class. The uri's authority is used to determine the | |
host, port, etc. for a filesystem. | |
</description> | |
</property> | |
<property> | |
<name>fs.gs.working.dir</name> | |
<value>/</value> | |
<description> | |
The directory relative gs: uris resolve in inside of the default bucket. | |
</description> | |
</property> | |
<property> | |
<name>fs.gs.system.bucket</name> | |
<value>hadoop-eu-atomx</value> | |
<description> | |
GCS bucket to use as a default bucket if fs.default.name is not a gs: uri. | |
</description> | |
</property> | |
<property> | |
<name>fs.gs.metadata.cache.directory</name> | |
<value>/tmp/hadoop_gcs_connector_metadata_cache</value> | |
<description> | |
Only used if fs.gs.metadata.cache.type is FILESYSTEM_BACKED, specifies | |
the local path to use as the base path for storing mirrored GCS metadata. | |
Must be an absolute path, must be a directory, and must be fully | |
readable/writable/executable by any user running processes which use the | |
GCS connector. | |
</description> | |
</property> | |
<property> | |
<name>fs.gs.impl</name> | |
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value> | |
<description>The FileSystem for gs: (GCS) uris.</description> | |
</property> | |
<property> | |
<name>fs.gs.project.id</name> | |
<value>magnetic-icon-88807</value> | |
<description> | |
Google Cloud Project ID with access to configured GCS buckets. | |
</description> | |
</property> | |
<property> | |
<name>fs.gs.metadata.cache.enable</name> | |
<value>true</value> | |
<description> | |
If true, a DirectoryListCache will be used to supplement | |
&amp;quot;list&amp;quot; requests to GCS to fill in any | |
missing items caused by eventual list consistency, | |
intercepting create/delete/copy calls to create cache entries. The | |
concrete type is determined with fs.gs.metadata.cache.type. | |
</description> | |
</property> | |
<property> | |
<name>fs.gs.implicit.dir.infer.enable</name> | |
<value>true</value> | |
<description> | |
If set, we create and return in-memory directory objects on the fly when | |
no backing object exists, but we know there are files with the same | |
prefix. | |
</description> | |
</property> | |
<property> | |
<name>fs.AbstractFileSystem.gs.impl</name> | |
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value> | |
<description>The AbstractFileSystem for gs: (GCS) uris.</description> | |
</property> | |
<property> | |
<name>fs.gs.metadata.cache.type</name> | |
<value>FILESYSTEM_BACKED</value> | |
<description> | |
Specifies which implementation of DirectoryListCache to use for | |
supplementing GCS API &amp;quot;list&amp;quot; requests. Supported | |
implementations: IN_MEMORY: Enforces immediate consistency within | |
same Java process. FILESYSTEM_BACKED: Enforces consistency across | |
all cooperating processes pointed at the same local mirror | |
directory, which may be an NFS directory for massively-distributed | |
coordination. | |
</description> | |
</property> | |
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo gcloud dataproc clusters create hadoop-eu-1 \ | |
--bucket=hadoop-eu-atomx \ | |
--image-version=1.2 \ | |
--initialization-actions="gs://hadoop-eu-atomx/0.11.0-2.8.2/initialization.sh" \ | |
--initialization-action-timeout 15m \ | |
--master-boot-disk-size-gb 10 \ | |
--master-machine-type n1-standard-16 \ | |
--num-master-local-ssds 0 \ | |
--worker-machine-type n1-standard-16 \ | |
--worker-boot-disk-size-gb 10 \ | |
--num-worker-local-ssds 1 \ | |
--num-workers 6 \ | |
--num-preemptible-workers 0 \ | |
--zone europe-west1-c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment