1+ #! /bin/bash
2+
3+ # Licensed to the Apache Software Foundation (ASF) under one or more
4+ # contributor license agreements. See the NOTICE file distributed with
5+ # this work for additional information regarding copyright ownership.
6+ # The ASF licenses this file to You under the Apache License, Version 2.0
7+ # (the "License"); you may not use this file except in compliance with
8+ # the License. You may obtain a copy of the License at
9+ #
10+ # http://www.apache.org/licenses/LICENSE-2.0
11+ #
12+ # Unless required by applicable law or agreed to in writing, software
13+ # distributed under the License is distributed on an "AS IS" BASIS,
14+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+ # See the License for the specific language governing permissions and
16+ # limitations under the License.
117
218# This script is used to set up a local Hadoop and Tez environment for running a simple word count example.
319# Prerequisites
1026# TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory
1127
1228# TEZ_VERSION comes from environment variable or is fetched from the Apache Tez download page
13- export TEZ_VERSION=${TEZ_VERSION:= $(curl -s " https://downloads.apache.org/tez/" | grep --color=never -o ' [0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n ' /\/$/!p' | sort -V | tail -1)} # e.g. 0.10.4
14- export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:= $PWD }
15- cd $TEZ_EXAMPLE_WORKING_DIR
29+ export TEZ_VERSION=" ${TEZ_VERSION:- $(curl -s " https://downloads.apache.org/tez/" | grep --color=never -o ' [0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n ' /\/$/!p' | sort -V | tail -1)} " # e.g. 0.10.4
30+ export TEZ_EXAMPLE_WORKING_DIR=" ${TEZ_EXAMPLE_WORKING_DIR:- $PWD } "
31+ cd " $TEZ_EXAMPLE_WORKING_DIR " || exit
1632
1733echo " TEZ_VERSION: $TEZ_VERSION "
18- wget -nc https://archive.apache.org/dist/tez/$TEZ_VERSION /apache-tez-$TEZ_VERSION -bin.tar.gz
34+ wget -nc " https://archive.apache.org/dist/tez/$TEZ_VERSION /apache-tez-$TEZ_VERSION -bin.tar.gz"
1935
2036# Need to extract the Tez tarball early to get hadoop version it depends on
2137if [ ! -d " apache-tez-$TEZ_VERSION -bin" ]; then
22- tar -xzf apache-tez-$TEZ_VERSION -bin.tar.gz
38+ tar -xzf " apache-tez-$TEZ_VERSION -bin.tar.gz"
2339fi
2440
25- export HADOOP_VERSION=${HADOOP_VERSION:= $(basename apache-tez-$TEZ_VERSION -bin/ lib/ hadoop-hdfs-client-* .jar | sed -E ' s/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/' )} # e.g. 3.4.1
41+ export HADOOP_VERSION=" ${HADOOP_VERSION:- $(basename apache-tez-$TEZ_VERSION -bin/ lib/ hadoop-hdfs-client-* .jar | sed -E ' s/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/' )} " # e.g. 3.4.1
2642
2743cat << EOF
2844***
@@ -32,20 +48,20 @@ cat <<EOF
3248***
3349EOF
3450
35- wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION /hadoop-$HADOOP_VERSION .tar.gz
51+ wget -nc " https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION /hadoop-$HADOOP_VERSION .tar.gz"
3652
3753if [ ! -d " hadoop-$HADOOP_VERSION " ]; then
38- tar -xzf hadoop-$HADOOP_VERSION .tar.gz
54+ tar -xzf " hadoop-$HADOOP_VERSION .tar.gz"
3955fi
4056
41- export HADOOP_HOME=$TEZ_EXAMPLE_WORKING_DIR /hadoop-$HADOOP_VERSION
42- export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR /apache-tez-$TEZ_VERSION -bin
43- export HADOOP_CLASSPATH=$TEZ_HOME /* :$TEZ_HOME /lib/* :$TEZ_HOME /conf
57+ export HADOOP_HOME=" $TEZ_EXAMPLE_WORKING_DIR /hadoop-$HADOOP_VERSION "
58+ export TEZ_HOME=" $TEZ_EXAMPLE_WORKING_DIR /apache-tez-$TEZ_VERSION -bin"
59+ export HADOOP_CLASSPATH=" $TEZ_HOME /*:$TEZ_HOME /lib/*:$TEZ_HOME /conf"
4460
45- export PATH=$PATH :$HADOOP_HOME /bin
61+ export PATH=" $PATH :$HADOOP_HOME /bin"
4662
4763# https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
48- cat << EOF > $HADOOP_HOME /etc/hadoop/hdfs-site.xml
64+ cat << EOF > " $HADOOP_HOME /etc/hadoop/hdfs-site.xml"
4965<?xml version="1.0" encoding="UTF-8"?>
5066<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
5167
@@ -57,7 +73,7 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/hdfs-site.xml
5773</configuration>
5874EOF
5975
60- cat << EOF > $HADOOP_HOME /etc/hadoop/core-site.xml
76+ cat << EOF > " $HADOOP_HOME /etc/hadoop/core-site.xml"
6177<?xml version="1.0" encoding="UTF-8"?>
6278<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
6379
@@ -69,7 +85,7 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/core-site.xml
6985</configuration>
7086EOF
7187
72- cat << EOF > $HADOOP_HOME /etc/hadoop/yarn-site.xml
88+ cat << EOF > " $HADOOP_HOME /etc/hadoop/yarn-site.xml"
7389<?xml version="1.0" encoding="UTF-8"?>
7490<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
7591
@@ -82,20 +98,20 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
8298EOF
8399
84100# optionally stop previous clusters if any
85- $HADOOP_HOME /sbin/stop-dfs.sh
86- $HADOOP_HOME /sbin/stop-yarn.sh
101+ " $HADOOP_HOME /sbin/stop-dfs.sh"
102+ " $HADOOP_HOME /sbin/stop-yarn.sh"
87103
88- rm -rf /tmp/hadoop-$USER /dfs/data
104+ rm -rf " /tmp/hadoop-$USER /dfs/data"
89105hdfs namenode -format -force
90106
91- $HADOOP_HOME /sbin/start-dfs.sh
92- $HADOOP_HOME /sbin/start-yarn.sh
107+ " $HADOOP_HOME /sbin/start-dfs.sh"
108+ " $HADOOP_HOME /sbin/start-yarn.sh"
93109
94- hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION
95- hadoop fs -copyFromLocal $TEZ_HOME /share/tez.tar.gz /apps/tez-$TEZ_VERSION
110+ hadoop fs -mkdir -p " /apps/tez-$TEZ_VERSION "
111+ hadoop fs -copyFromLocal " $TEZ_HOME /share/tez.tar.gz" " /apps/tez-$TEZ_VERSION "
96112
97113# create a simple tez-site.xml
98- cat << EOF > $TEZ_HOME /conf/tez-site.xml
114+ cat << EOF > " $TEZ_HOME /conf/tez-site.xml"
99115<?xml version="1.0" encoding="UTF-8"?>
100116<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
101117
@@ -121,24 +137,23 @@ Friend
121137Game
122138EOF
123139
124- hadoop fs -copyFromLocal words.txt /words.txt
140+ hadoop fs -copyFromLocal " words.txt" " /words.txt"
125141
126142export HADOOP_USER_CLASSPATH_FIRST=true
127143# finally run the example
128- yarn jar $TEZ_HOME /tez-examples-$TEZ_VERSION .jar orderedwordcount /words.txt /words_out
144+ yarn jar " $TEZ_HOME /tez-examples-$TEZ_VERSION .jar" orderedwordcount " /words.txt" " /words_out"
129145
130146# check the output
131- hadoop fs -ls /words_out
132- hadoop fs -text /words_out/part-v002-o000-r-00000
133-
147+ hadoop fs -ls " /words_out"
148+ hadoop fs -text " /words_out/part-v002-o000-r-00000"
134149
135150cat << EOF
136151*** Since the environment is already set up, you can rerun the DAG using the commands below.
137152
138153export HADOOP_USER_CLASSPATH_FIRST=true
139- export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR /apache-tez-$TEZ_VERSION -bin
140- export HADOOP_CLASSPATH=$TEZ_HOME /*:$TEZ_HOME /lib/*:$TEZ_HOME /conf
141- $HADOOP_HOME /bin/yarn jar $TEZ_HOME /tez-examples-$TEZ_VERSION .jar orderedwordcount /words.txt /words_out
154+ export TEZ_HOME=" $TEZ_EXAMPLE_WORKING_DIR /apache-tez-$TEZ_VERSION -bin"
155+ export HADOOP_CLASSPATH=" $TEZ_HOME /*:$TEZ_HOME /lib/*:$TEZ_HOME /conf"
156+ " $HADOOP_HOME /bin/yarn" jar " $TEZ_HOME /tez-examples-$TEZ_VERSION .jar" orderedwordcount /words.txt /words_out
142157
143158*** You can also visit some of the sites that are set up during the script execution.
144159
0 commit comments