Skip to content

Commit 066e6fa

Browse files
JWileczekrfecher
authored andcommitted
Implement JupyterHub deployment (#1368)
1 parent 5ff476a commit 066e6fa

6 files changed

Lines changed: 270 additions & 87 deletions

File tree

deploy/packaging/emr/generate-emr-scripts.sh

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,11 @@ mkdir -p $TARGET_ROOT/quickstart
4848
cp $TEMPLATE_ROOT/bootstrap-geowave.sh.template $TEMPLATE_ROOT/bootstrap-geowave.sh
4949
cp $TEMPLATE_ROOT/geowave-install-lib.sh.template $TEMPLATE_ROOT/geowave-install-lib.sh
5050
cp $TEMPLATE_ROOT/quickstart/geowave-env.sh.template $TARGET_ROOT/quickstart/geowave-env.sh
51-
cp $TEMPLATE_ROOT/bootstrap-jupyter.sh.template $TEMPLATE_ROOT/bootstrap-jupyter.sh
52-
cp $TEMPLATE_ROOT/create-configure-kernel.sh.template $TEMPLATE_ROOT/create-configure-kernel.sh
51+
52+
cp $TEMPLATE_ROOT/jupyter/bootstrap-jupyter.sh.template $TEMPLATE_ROOT/bootstrap-jupyter.sh
53+
cp $TEMPLATE_ROOT/jupyter/create-configure-kernel.sh.template $TEMPLATE_ROOT/create-configure-kernel.sh
54+
cp $TEMPLATE_ROOT/jupyter/bootstrap-jupyterhub.sh.template $TEMPLATE_ROOT/bootstrap-jupyterhub.sh
55+
5356
cp $TEMPLATE_ROOT/bootstrap-zeppelin.sh.template $TEMPLATE_ROOT/bootstrap-zeppelin.sh
5457
cp $TEMPLATE_ROOT/configure-zeppelin.sh.template $TEMPLATE_ROOT/configure-zeppelin.sh
5558

@@ -72,6 +75,9 @@ sed -i -e s/'$GEOWAVE_VERSION_TOKEN'/${ARGS[version]}/g $TEMPLATE_ROOT/bootstrap
7275
sed -i -e s/'$GEOWAVE_VERSION_URL_TOKEN'/${GEOWAVE_VERSION_URL_TOKEN}/g $TEMPLATE_ROOT/bootstrap-jupyter.sh
7376
sed -i -e s/'$GEOWAVE_VERSION_TOKEN'/${ARGS[version]}/g $TEMPLATE_ROOT/create-configure-kernel.sh
7477

78+
sed -i -e s/'$GEOWAVE_VERSION_TOKEN'/${ARGS[version]}/g $TEMPLATE_ROOT/bootstrap-jupyterhub.sh
79+
sed -i -e s/'$GEOWAVE_VERSION_URL_TOKEN'/${GEOWAVE_VERSION_URL_TOKEN}/g $TEMPLATE_ROOT/bootstrap-jupyterhub.sh
80+
7581
sed -i -e s/'$GEOWAVE_VERSION_TOKEN'/${ARGS[version]}/g $TEMPLATE_ROOT/bootstrap-zeppelin.sh
7682
sed -i -e s/'$GEOWAVE_VERSION_URL_TOKEN'/${GEOWAVE_VERSION_URL_TOKEN}/g $TEMPLATE_ROOT/bootstrap-zeppelin.sh
7783
sed -i -e s/'$GEOWAVE_VERSION_TOKEN'/${ARGS[version]}/g $TEMPLATE_ROOT/configure-zeppelin.sh
@@ -102,8 +108,14 @@ done
102108
# Copy jupyter additions to separate generated folder
103109
# This will put scripts into separate jupyter folder on s3 when published.
104110
mkdir -p $TARGET_ROOT/jupyter
111+
112+
# copy permanent resources that don't need a template
113+
cp $TEMPLATE_ROOT/jupyter/install-conda.sh $TARGET_ROOT/jupyter/install-conda.sh
114+
cp $TEMPLATE_ROOT/jupyter/jupyterhub_config.py $TARGET_ROOT/jupyter/jupyterhub_config.py
115+
105116
cp $TEMPLATE_ROOT/bootstrap-jupyter.sh $TARGET_ROOT/jupyter/bootstrap-jupyter.sh
106117
cp $TEMPLATE_ROOT/create-configure-kernel.sh $TARGET_ROOT/jupyter/create-configure-kernel.sh
118+
cp $TEMPLATE_ROOT/bootstrap-jupyterhub.sh $TARGET_ROOT/jupyter/bootstrap-jupyterhub.sh
107119

108120
# Copy zeppelin additions to separate generated folder
109121
# This will put scripts into separate zeppelin folder on s3 when published.
@@ -116,6 +128,7 @@ rm $TEMPLATE_ROOT/bootstrap-geowave.sh
116128
rm $TEMPLATE_ROOT/geowave-install-lib.sh
117129
rm $TEMPLATE_ROOT/bootstrap-jupyter.sh
118130
rm $TEMPLATE_ROOT/create-configure-kernel.sh
131+
rm $TEMPLATE_ROOT/bootstrap-jupyterhub.sh
119132
rm $TEMPLATE_ROOT/bootstrap-zeppelin.sh
120133
rm $TEMPLATE_ROOT/configure-zeppelin.sh
121134

deploy/packaging/emr/template/bootstrap-jupyter.sh.template renamed to deploy/packaging/emr/template/jupyter/bootstrap-jupyter.sh.template

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,25 @@
11
#!/bin/bash
22

33
GEOWAVE_VER=${1:-$GEOWAVE_VERSION_TOKEN}
4-
54
JUPYTER_PASSWORD=${2-geowave}
65

6+
is_master() {
7+
if [ $(jq '.isMaster' /mnt/var/lib/info/instance.json) = 'true' ]; then
8+
return 0
9+
else
10+
return 1
11+
fi
12+
}
13+
714
# I've externalized commands into library functions for clarity, download and source
815
if [ ! -f /tmp/create-configure-kernel.sh ]; then
916
aws s3 cp s3://geowave/$GEOWAVE_VERSION_URL_TOKEN/scripts/emr/jupyter/create-configure-kernel.sh /tmp/create-configure-kernel.sh
1017
fi
11-
source /tmp/create-configure-kernel.sh
18+
19+
if [ ! -f /tmp/install-conda.sh ]; then
20+
aws s3 cp s3://geowave/$GEOWAVE_VERSION_URL_TOKEN/scripts/emr/jupyter/install-conda.sh /tmp/install-conda.sh
21+
sudo chmod +x /tmp/install-conda.sh
22+
fi
1223

1324
# The EMR customize hooks run _before_ everything else, so Spark is not yet ready
1425
THIS_SCRIPT="$(realpath "${BASH_SOURCE[0]}")"
@@ -22,48 +33,20 @@ if [ ! -f "$RUN_FLAG" ]; then
2233
exit 0 # Bail and let EMR finish initializing
2334
fi
2435

25-
# Install conda
26-
wget https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh -O $HOME/miniconda.sh
27-
2836
# Download example notebooks from s3
29-
aws s3 sync s3://geowave-notebooks/$GEOWAVE_VERSION_URL_TOKEN/notebooks/ $HOME/notebooks/
30-
31-
# Modify the file permissions to allow execution within this shell
32-
chmod +x $HOME/miniconda.sh
33-
34-
# Install miniconda and output directory to ~/conda
35-
$HOME/miniconda.sh -b -p $HOME/conda
36-
37-
# Add Conda to the path
38-
printf '\nexport PATH=$HOME/conda/bin:$PATH' >> $HOME/.bashrc
39-
40-
# Source the new PATH
41-
source $HOME/.bashrc
37+
aws s3 sync s3://geowave-notebooks/latest/notebooks/ $HOME/notebooks/
4238

43-
# Setup conda to use the correct channel for pixiedust prerequisites
44-
conda config --set always_yes yes --set changeps1 no
45-
conda config -f --add channels conda-forge
46-
47-
# Install the necessary components for jupyter and pixiedust
48-
conda install jupyter matplotlib numpy pandas pyyaml requests shapely folium owslib
49-
50-
# cleanup
51-
rm ~/miniconda.sh
39+
source /tmp/install-conda.sh
5240

5341
echo bootstrap_conda.sh completed. PATH now: $PATH
5442

5543
echo Performing pixiedust and jupyter kernel setup.
5644

57-
# setup python 3.5 in the master and workers
58-
printf "\nexport PYSPARK_PYTHON=$HOME/conda/bin/python3.5" >> $HOME/.bashrc
59-
printf "\nexport PYSPARK_DRIVER_PYTHON=$HOME/conda/bin/python3.5" >> $HOME/.bashrc
60-
# This was added because Upstart doesn't capture user environment variables before loading jupyter
61-
printf "\nexport HOSTNAME=$HOSTNAME" >> $HOME/.bashrc
62-
source $HOME/.bashrc
45+
source /tmp/create-configure-kernel.sh $GEOWAVE_VER
46+
47+
source /etc/profile.d/conda.sh
6348

64-
pip install --upgrade pip
65-
# Pandas added to install again because conda-forge misses correct pytz dependency version
66-
pip install pixiedust ipywidgets ipyleaflet geomet pandas shapely folium owslib
49+
conda install jupyter
6750

6851
jupyter nbextension enable --py --sys-prefix ipyleaflet
6952
jupyter nbextension enable --py --sys-prefix widgetsnbextension
@@ -81,8 +64,29 @@ printf "\nc.NotebookApp.ip = '*'" >> $HOME/.jupyter/jupyter_notebook_config.py
8164
printf "\nc.NotebookApp.notebook_dir = '$HOME/notebooks/'" >> $HOME/.jupyter/jupyter_notebook_config.py
8265
printf "\nc.NotebookApp.port = 9000" >> $HOME/.jupyter/jupyter_notebook_config.py
8366

84-
# Install Jupyter Kernel components on master node
85-
if is_master ; then
86-
install_kernel
87-
fi
67+
#Adding Jupyter to Upstart so it can be run at bootstrap
68+
cd $HOME
69+
sudo cat << EOF > $HOME/jupyter.conf
70+
description "Jupyter"
71+
72+
start on runlevel [2345]
73+
stop on runlevel [016]
74+
75+
respawn
76+
respawn limit 0 10
77+
78+
env HOME=$HOME
79+
script
80+
. $HOME/.bashrc
81+
exec start-stop-daemon --start -c hadoop --exec $HOME/conda/bin/jupyter-notebook
82+
end script
83+
EOF
84+
sudo mv $HOME/jupyter.conf /etc/init/
85+
sudo chown root:root /etc/init/jupyter.conf
86+
87+
# be sure that jupyter daemon is registered in initctl
88+
sudo initctl reload-configuration
89+
90+
# start jupyter daemon
91+
sudo initctl start jupyter
8892

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/bin/bash
2+
3+
# Variables for kernel creation
4+
GEOWAVE_VER=${1:-$GEOWAVE_VERSION_TOKEN}
5+
USER_PASS=${2:-geowave}
6+
7+
is_master() {
8+
if [ $(jq '.isMaster' /mnt/var/lib/info/instance.json) = 'true' ]; then
9+
return 0
10+
else
11+
return 1
12+
fi
13+
}
14+
15+
# I've externalized commands into library functions for clarity, download and source
16+
if [ ! -f /tmp/create-configure-kernel.sh ]; then
17+
aws s3 cp s3://geowave/$GEOWAVE_VERSION_URL_TOKEN/scripts/emr/jupyter/create-configure-kernel.sh /tmp/create-configure-kernel.sh
18+
sudo chmod +x /tmp/create-configure-kernel.sh
19+
fi
20+
21+
if [ ! -f /tmp/install-conda.sh ]; then
22+
aws s3 cp s3://geowave/$GEOWAVE_VERSION_URL_TOKEN/scripts/emr/jupyter/install-conda.sh /tmp/install-conda.sh
23+
sudo chmod +x /tmp/install-conda.sh
24+
fi
25+
26+
27+
# The EMR customize hooks run _before_ everything else, so Spark is not yet ready
28+
THIS_SCRIPT="$(realpath "${BASH_SOURCE[0]}")"
29+
RUN_FLAG="${THIS_SCRIPT}.run"
30+
# On first boot skip past this script to allow EMR to set up the environment. Set a callback
31+
# which will poll for availability of Spark and then create the jupyter kernel
32+
if [ ! -f "$RUN_FLAG" ]; then
33+
touch "$RUN_FLAG"
34+
TIMEOUT= is_master && TIMEOUT=3 || TIMEOUT=4
35+
echo "bash -x $(realpath "${BASH_SOURCE[0]}") > /tmp/bootstrap-jupyterhub.log" | at now + $TIMEOUT min
36+
exit 0 # Bail and let EMR finish initializing
37+
fi
38+
39+
# Download example notebooks from s3
40+
aws s3 sync s3://geowave-notebooks/latest/notebooks/ $HOME/notebooks/
41+
42+
# Download hub configuration file
43+
sudo su root -c "aws s3 cp s3://geowave/$GEOWAVE_VERSION_URL_TOKEN/scripts/emr/jupyter/jupyterhub_config.py /etc/jupyterhub/"
44+
45+
# Download latest conda (Python 3.6.1) to root install location
46+
sudo su root -c "source /tmp/install-conda.sh /opt/miniconda.sh /opt/conda/"
47+
48+
# Install the necessary components for jupyter and pixiedust
49+
sudo su root -c "/opt/conda/bin/conda install jupyterhub jupyter ncurses"
50+
51+
echo bootstrap_conda.sh completed. PATH now: $PATH
52+
echo Performing pixiedust and jupyter kernel setup.
53+
54+
if is_master; then
55+
sudo su root -c "source /tmp/create-configure-kernel.sh $GEOWAVE_VER /usr/local/pixiedust /opt/conda/bin /opt/conda/share/jupyter/kernels"
56+
fi
57+
58+
# Allow pixiedust to be accessed by all users
59+
sudo chmod -R 777 /usr/local/pixiedust/
60+
61+
# Add upstart service to run jupyterhub
62+
sudo cat << EOF > $HOME/jupyterhub.conf
63+
description "JupyterHub"
64+
65+
start on runlevel [2345]
66+
stop on runlevel [016]
67+
68+
respawn
69+
respawn limit 0 10
70+
71+
script
72+
if [ -f /etc/jupyterhub/oauth_env.sh ]; then
73+
. /etc/jupyterhub/oauth_env.sh
74+
fi
75+
. /etc/profile.d/conda.sh
76+
exec start-stop-daemon --start --exec /opt/conda/bin/jupyterhub -- --config /etc/jupyterhub/jupyterhub_config.py > /var/log/jupyterhub.log 2>&1
77+
end script
78+
EOF
79+
sudo mv $HOME/jupyterhub.conf /etc/init/
80+
sudo chown root:root /etc/init/jupyterhub.conf
81+
82+
sudo mkdir -p /srv/jupyterhub
83+
# Write default userlist that adds jupyterhub user as admin
84+
sudo cat << EOF > $HOME/userlist
85+
jupyterhub admin
86+
EOF
87+
sudo mv $HOME/userlist /srv/jupyterhub/
88+
sudo chown root:root /srv/jupyterhub/userlist
89+
90+
# Add jupyterhub user
91+
sudo useradd -m -s /bin/bash -N jupyterhub
92+
sudo printf "jupyterhub:$USER_PASS" | sudo chpasswd
93+
94+
# Start jupyterhub service
95+
# be sure that jupyter daemon is registered in initctl
96+
sudo initctl reload-configuration
97+
sudo initctl start jupyterhub

deploy/packaging/emr/template/create-configure-kernel.sh.template renamed to deploy/packaging/emr/template/jupyter/create-configure-kernel.sh.template

Lines changed: 19 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
#!/bin/bash
22

3+
34
GEOWAVE_VER=${1:-$GEOWAVE_VERSION_TOKEN}
4-
MASTER_ARG=${2:-yarn}
5-
INTIAL_POLLING_INTERVAL=15 # This gets doubled for each attempt up to max_attempts
5+
PIXIEDUST_HOME=${2:-$HOME/pixiedust/}
6+
CONDA_INSTALL=${3:-$HOME/conda/bin}
7+
KERNEL_OUT=${4:-$HOME/.local/share/jupyter/kernels/}
8+
SPARK_HOME=${5:-/usr/lib/spark}
9+
MASTER_ARG=${6:-yarn}
610

7-
# Parses a configuration file put in place by EMR to determine the role of this node
11+
INTIAL_POLLING_INTERVAL=15 # This gets doubled for each attempt up to max_attempts
812

9-
is_master() {
10-
if [ $(jq '.isMaster' /mnt/var/lib/info/instance.json) = 'true' ]; then
11-
return 0
12-
else
13-
return 1
14-
fi
15-
}
13+
KERNEL_DIR=$HOME/.local/share/jupyter/kernels/
1614

1715
# Avoid race conditions and actually poll for availability of component dependencies
1816
# Credit: http://stackoverflow.com/questions/8350942/how-to-re-run-the-curl-command-automatically-when-the-error-occurs/8351489#8351489
@@ -61,24 +59,26 @@ wait_until_spark_is_available() {
6159
fi
6260
}
6361

64-
install_kernel() {
62+
#Install the Kernel
6563
wait_until_spark_is_available
6664

6765
# Create the jupyter kernel
68-
jupyter pixiedust install <<END
69-
y
66+
mkdir -p $PIXIEDUST_HOME
67+
68+
${CONDA_INSTALL}/jupyter pixiedust install <<END
69+
n
70+
$PIXIEDUST_HOME
7071
n
71-
/usr/lib/spark
72+
$SPARK_HOME
7273
y
7374
y
7475
y
7576
END
7677

7778
#Use jq to remove unnecessary keys
7879
GEOWAVE_INSTALL=/usr/local/geowave/tools/geowave-tools-${GEOWAVE_VER}-apache.jar
79-
#Grab all pixiedust kernels installed (should only be one by default), and use for master kernel changes.
80-
KERNEL_DIR=$HOME/.local/share/jupyter/kernels/
81-
PIXIEDUST_KERNELS="$(find $KERNEL_DIR -type d -name 'pythonwithpixiedustspark*')"
80+
PIXIEDUST_KERNELS=$(find $KERNEL_DIR -type d -name pythonwithpixiedustspark*)
81+
echo $PIXIEDUST_KERNELS
8282
KERNEL_JSON=$PIXIEDUST_KERNELS/kernel.json
8383
jq 'del(.env["SPARK_LOCAL_IP"])' $KERNEL_JSON > tmp.$$.json && mv tmp.$$.json $KERNEL_JSON
8484
jq 'del(.env["SPARK_DRIVER_MEMORY"])' $KERNEL_JSON > tmp.$$.json && mv tmp.$$.json $KERNEL_JSON
@@ -128,31 +128,5 @@ jq --arg submit_args "${submit_string}" '.env["PYSPARK_SUBMIT_ARGS"]= $submit_ar
128128

129129
echo "Modified Kernel to use yarn by default"
130130

131-
#Adding Jupyter to Upstart so it can be run at bootstrap
132-
cd $HOME
133-
sudo cat << EOF > $HOME/jupyter.conf
134-
description "Jupyter"
135-
136-
start on runlevel [2345]
137-
stop on runlevel [016]
138-
139-
respawn
140-
respawn limit 0 10
141-
142-
env HOME=$HOME
143-
script
144-
. $HOME/.bashrc
145-
exec start-stop-daemon --start -c hadoop --exec $HOME/conda/bin/jupyter-notebook
146-
end script
147-
EOF
148-
sudo mv $HOME/jupyter.conf /etc/init/
149-
sudo chown root:root /etc/init/jupyter.conf
150-
151-
# be sure that jupyter daemon is registered in initctl
152-
sudo initctl reload-configuration
153-
154-
# start jupyter daemon
155-
sudo initctl start jupyter
156-
157-
return 0
158-
}
131+
# Copy final modified kernel to output install location
132+
cp -R $PIXIEDUST_KERNELS $KERNEL_OUT
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env bash
2+
3+
CONDA_DL_LOC=${1-$HOME/miniconda.sh}
4+
CONDA_INSTALL_LOC=${2-$HOME/conda/}
5+
6+
7+
# Download latest conda (Python 3.6.1) to root install location
8+
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O $CONDA_DL_LOC
9+
10+
# Modify the file permissions to allow execution within this shell
11+
chmod +x "$CONDA_DL_LOC"
12+
13+
# Install miniconda and output directory to /opt/conda
14+
$CONDA_DL_LOC -bfp $CONDA_INSTALL_LOC
15+
16+
# Add Conda to the path so all users with shell can see conda
17+
printf "export PATH=${CONDA_INSTALL_LOC}bin:"'$PATH' | sudo tee -a /etc/profile.d/conda.sh
18+
# setup python 3.6 in the master and workers
19+
printf "\nexport PYSPARK_PYTHON=${CONDA_INSTALL_LOC}bin/python3.6" | sudo tee -a /etc/profile.d/conda.sh
20+
printf "\nexport PYSPARK_DRIVER_PYTHON=${CONDA_INSTALL_LOC}bin/python3.6" | sudo tee -a /etc/profile.d/conda.sh
21+
# This was added because Upstart doesn't capture user environment variables before loading jupyter
22+
printf "\nexport HOSTNAME=$HOSTNAME" | sudo tee -a /etc/profile.d/conda.sh
23+
24+
sudo chmod +x /etc/profile.d/conda.sh
25+
26+
source /etc/profile.d/conda.sh
27+
28+
# Set config options to install dependencies properly
29+
${CONDA_INSTALL_LOC}/bin/conda config --system --set always_yes yes --set changeps1 no
30+
${CONDA_INSTALL_LOC}/bin/conda config --system -f --add channels conda-forge
31+
32+
# Install dependencies used for geowave + pixiedust
33+
${CONDA_INSTALL_LOC}/bin/conda install matplotlib numpy pandas pyyaml requests shapely folium owslib nbconvert
34+
35+
# Install pip dependencies
36+
${CONDA_INSTALL_LOC}/bin/pip install pixiedust oauthenticator ipywidgets ipyleaflet geomet pandas shapely folium owslib
37+
38+
rm $CONDA_DL_LOC

0 commit comments

Comments
 (0)