Skip to content

Commit 2357704

Browse files
committed
TEZ-4631: Include an official script that installs hadoop and tez and runs a simple example DAG (#414) - addendum ASF license
1 parent e847435 commit 2357704

1 file changed

Lines changed: 46 additions & 31 deletions

File tree

dev-support/bin/tez_run_example.sh

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
#!/bin/bash
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
117

218
# This script is used to set up a local Hadoop and Tez environment for running a simple word count example.
319
# Prerequisites
@@ -10,19 +26,19 @@
1026
# TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory
1127

1228
# TEZ_VERSION comes from environment variable or is fetched from the Apache Tez download page
13-
export TEZ_VERSION=${TEZ_VERSION:=$(curl -s "https://downloads.apache.org/tez/" | grep --color=never -o '[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)} # e.g. 0.10.4
14-
export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:=$PWD}
15-
cd $TEZ_EXAMPLE_WORKING_DIR
29+
export TEZ_VERSION="${TEZ_VERSION:-$(curl -s "https://downloads.apache.org/tez/" | grep --color=never -o '[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)}" # e.g. 0.10.4
30+
export TEZ_EXAMPLE_WORKING_DIR="${TEZ_EXAMPLE_WORKING_DIR:-$PWD}"
31+
cd "$TEZ_EXAMPLE_WORKING_DIR" || exit
1632

1733
echo "TEZ_VERSION: $TEZ_VERSION"
18-
wget -nc https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz
34+
wget -nc "https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz"
1935

2036
# Need to extract the Tez tarball early to get hadoop version it depends on
2137
if [ ! -d "apache-tez-$TEZ_VERSION-bin" ]; then
22-
tar -xzf apache-tez-$TEZ_VERSION-bin.tar.gz
38+
tar -xzf "apache-tez-$TEZ_VERSION-bin.tar.gz"
2339
fi
2440

25-
export HADOOP_VERSION=${HADOOP_VERSION:=$(basename apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-*.jar | sed -E 's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')} # e.g. 3.4.1
41+
export HADOOP_VERSION="${HADOOP_VERSION:-$(basename apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-*.jar | sed -E 's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')}" # e.g. 3.4.1
2642

2743
cat <<EOF
2844
***
@@ -32,20 +48,20 @@ cat <<EOF
3248
***
3349
EOF
3450

35-
wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
51+
wget -nc "https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"
3652

3753
if [ ! -d "hadoop-$HADOOP_VERSION" ]; then
38-
tar -xzf hadoop-$HADOOP_VERSION.tar.gz
54+
tar -xzf "hadoop-$HADOOP_VERSION.tar.gz"
3955
fi
4056

41-
export HADOOP_HOME=$TEZ_EXAMPLE_WORKING_DIR/hadoop-$HADOOP_VERSION
42-
export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
43-
export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
57+
export HADOOP_HOME="$TEZ_EXAMPLE_WORKING_DIR/hadoop-$HADOOP_VERSION"
58+
export TEZ_HOME="$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin"
59+
export HADOOP_CLASSPATH="$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf"
4460

45-
export PATH=$PATH:$HADOOP_HOME/bin
61+
export PATH="$PATH:$HADOOP_HOME/bin"
4662

4763
# https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
48-
cat <<EOF > $HADOOP_HOME/etc/hadoop/hdfs-site.xml
64+
cat <<EOF > "$HADOOP_HOME/etc/hadoop/hdfs-site.xml"
4965
<?xml version="1.0" encoding="UTF-8"?>
5066
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
5167
@@ -57,7 +73,7 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/hdfs-site.xml
5773
</configuration>
5874
EOF
5975

60-
cat <<EOF > $HADOOP_HOME/etc/hadoop/core-site.xml
76+
cat <<EOF > "$HADOOP_HOME/etc/hadoop/core-site.xml"
6177
<?xml version="1.0" encoding="UTF-8"?>
6278
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
6379
@@ -69,7 +85,7 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/core-site.xml
6985
</configuration>
7086
EOF
7187

72-
cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
88+
cat <<EOF > "$HADOOP_HOME/etc/hadoop/yarn-site.xml"
7389
<?xml version="1.0" encoding="UTF-8"?>
7490
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
7591
@@ -82,20 +98,20 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
8298
EOF
8399

84100
# optionally stop previous clusters if any
85-
$HADOOP_HOME/sbin/stop-dfs.sh
86-
$HADOOP_HOME/sbin/stop-yarn.sh
101+
"$HADOOP_HOME/sbin/stop-dfs.sh"
102+
"$HADOOP_HOME/sbin/stop-yarn.sh"
87103

88-
rm -rf /tmp/hadoop-$USER/dfs/data
104+
rm -rf "/tmp/hadoop-$USER/dfs/data"
89105
hdfs namenode -format -force
90106

91-
$HADOOP_HOME/sbin/start-dfs.sh
92-
$HADOOP_HOME/sbin/start-yarn.sh
107+
"$HADOOP_HOME/sbin/start-dfs.sh"
108+
"$HADOOP_HOME/sbin/start-yarn.sh"
93109

94-
hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION
95-
hadoop fs -copyFromLocal $TEZ_HOME/share/tez.tar.gz /apps/tez-$TEZ_VERSION
110+
hadoop fs -mkdir -p "/apps/tez-$TEZ_VERSION"
111+
hadoop fs -copyFromLocal "$TEZ_HOME/share/tez.tar.gz" "/apps/tez-$TEZ_VERSION"
96112

97113
# create a simple tez-site.xml
98-
cat <<EOF > $TEZ_HOME/conf/tez-site.xml
114+
cat <<EOF > "$TEZ_HOME/conf/tez-site.xml"
99115
<?xml version="1.0" encoding="UTF-8"?>
100116
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
101117
@@ -121,24 +137,23 @@ Friend
121137
Game
122138
EOF
123139

124-
hadoop fs -copyFromLocal words.txt /words.txt
140+
hadoop fs -copyFromLocal "words.txt" "/words.txt"
125141

126142
export HADOOP_USER_CLASSPATH_FIRST=true
127143
# finally run the example
128-
yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
144+
yarn jar "$TEZ_HOME/tez-examples-$TEZ_VERSION.jar" orderedwordcount "/words.txt" "/words_out"
129145

130146
# check the output
131-
hadoop fs -ls /words_out
132-
hadoop fs -text /words_out/part-v002-o000-r-00000
133-
147+
hadoop fs -ls "/words_out"
148+
hadoop fs -text "/words_out/part-v002-o000-r-00000"
134149

135150
cat <<EOF
136151
*** Since the environment is already set up, you can rerun the DAG using the commands below.
137152
138153
export HADOOP_USER_CLASSPATH_FIRST=true
139-
export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
140-
export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
141-
$HADOOP_HOME/bin/yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
154+
export TEZ_HOME="$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin"
155+
export HADOOP_CLASSPATH="$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf"
156+
"$HADOOP_HOME/bin/yarn" jar "$TEZ_HOME/tez-examples-$TEZ_VERSION.jar" orderedwordcount /words.txt /words_out
142157
143158
*** You can also visit some of the sites that are set up during the script execution.
144159

0 commit comments

Comments
 (0)