- [Spark] 설치 및 시작2020년 06월 25일
- 홀쑥
- 작성자
- 2020.06.25.:51
dn01의 루트계정에서
# cd /tmp
# wget http://apache.mirror.cdnetworks.com/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
# tar xzvf spark-2.4.5-bin-hadoop2.7.tgz
# mkdir -p /opt/spark/2.4.5
# mv spark-2.4.5-bin-hadoop2.7/* /opt/spark/2.4.5/
# ln -s /opt/spark/2.4.5 /opt/spark/current
# chown -R hadoop:hadoop /opt/spark/
# su - hadoop
# vi ~/.bash_profile
###### spark ######################
export SPARK_HOME=/opt/spark/current
export PATH=$PATH:$SPARK_HOME/bin
export PATH=$PATH:$SPARK_HOME/sbin
#### spark ######################끝에 입력 후 저장
# source ~/.bash_profile
# cd $SPARK_HOME/conf
# cp slaves.template slaves
# vi slaves
localhost 지우고
nn01
dn02 입력 후 저장
# cp spark-defaults.conf.template spark-defaults.conf
# vi spark-defaults.conf
spark.yarn.jars /opt/spark/current/jars/*
저장하고 확인
# cp log4j.properties.template log4j.properties
# vi log4j.properties
INFO -> ERROR로 바꿔 줌 -> Spark 로그에 정신없는 INFO가 안나타남
log4j.rootCategory=ERROR, console
# cp spart-env.sh.template spark-env.sh
# vi spark-env.sh
끝에 추가
SPARK_MASTER_HOST=dn01
export JAVA_HOME=/opt/jdk/current
export HADOOP_HOME=/opt/hadoop/current
export SPARK_HOME=/opt/spark/current
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_DRIVER_MEMORY=2g
export SPARK_EXECUTOR_INSTANCES=2
export SPARK_EXECUTOR_CORES=1
export SPARK_EXECUTOR_MEMORY=2g
export SPARK_MASTER_IP=192.168.56.102
#export SPARK_WORKER_DIR=/spark_data/spwork
#export SPARK_PID_DIR=/spark_data/sptmp
export SPARK_DIST_CLASSPATH=$(/opt/hadoop/current/bin/hadoop classpath):/opt/spark/current/jars/*
#export PYTHONPATH=/opt/python/current/python3
#export PYSPARK_PYTHON=/opt/python/current/python3# spark-shell
scala> sc.setLogLevel("WARN")
scala> val f = sc.textFile("file:///etc/hosts")
scala> f.count
scala> f.first
scala> f.collect
scala> :quitdn01에서 dn02와 nn01에 복사
# sudo scp -r /opt/spark dn02:/opt/spark
# sudo scp -r /opt/spark nn01:/opt/spark
--- sudo 명령어 안될 때
root 계정에서 visudo -f /etc/sudoers
ecs :100 라인 root ALL=(ALL) ALL 을 yy로 복사하고 바로 p 눌려서 붙이고root를 hadoop으로 변경
root ALL=(ALL) ALL
hadoop ALL=(ALL) ALL
여기에 패스워드까지 묻지 않고 연결하고 싶으면
hadoop ALL=(ALL) NOPASSWD: ALL ( 계정 등록 )
%hadoop ALL=(ALL) NOPASSWD: ALL ( 그룹 등록 )---
링크 연결이 끊김
dn02와 nn01에서 root계정 로그인 후
# rm -rf /opt/spark/current
# ln -s /opt/spark/2.4.5 /opt/spark/current
# ll /opt/spark/
# chown -R hadoop:hadoop /opt/spark/
다음글이전글이전 글이 없습니다.댓글