版本號(hào)
apache-hive-3.1.3-bin.tar
spark-3.0.0-bin-hadoop3.2.tgz
hadoop-3.1.3.tar.gz
步驟
在hdfs上新建
spark-history(設(shè)置權(quán)限777),spark-jars文件夾
上傳jar到hdfs
hdfs dfs -D dfs.replication=1 -put ./* /spark-jars
hadoop
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--指定hadoop集群在zookeeper上注冊(cè)的節(jié)點(diǎn)名-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hacluster</value>
</property>
<!--指定hadoop運(yùn)行時(shí)產(chǎn)生的臨時(shí)文件-->
<property>
<name>hadoop.tmp.dir</name>
<value>file:///opt/hadoop-3.1.3/tmp</value>
</property>
<!--設(shè)置緩存大小 默認(rèn)4KB--> <property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<!--指定zookeeper的存放地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>node15:2181,node16:2181,node17:2181,node18:2181</value>
</property>
<!--配置允許root代理訪問主機(jī)節(jié)點(diǎn)-->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<!--配置該節(jié)點(diǎn)允許root用戶所屬的組-->
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<!-- 配置HDFS網(wǎng)頁登錄使用的靜態(tài)用戶為summer-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<!--數(shù)據(jù)塊默認(rèn)大小128M-->
<name>dfs.block.size</name>
<value>134217728</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>activeNode</value>
</property>
<property>
<!--副本數(shù)量 不配置默認(rèn)為3-->
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<!--namenode節(jié)點(diǎn)數(shù)據(jù)(元數(shù)據(jù))的存放位置-->
<name>dfs.name.dir</name>
<value>file:///opt/hadoop-3.1.3/dfs/namenode_data</value>
</property>
<property>
<!--datanode節(jié)點(diǎn)數(shù)據(jù)(元數(shù)據(jù))的存放位置-->
<name>dfs.data.dir</name>
<value>file:///opt/hadoop-3.1.3/dfs/datanode_data</value>
</property>
<property>
<!--開啟hdfs的webui界面-->
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<!--datanode上負(fù)責(zé)進(jìn)行文件操作的線程數(shù)-->
<name>dfs.datanode.max.transfer.threads</name>
<value>4096</value>
</property>
<property>
<!--指定hadoop集群在zookeeper上的注冊(cè)名-->
<name>dfs.nameservices</name>
<value>hacluster</value>
</property>
<property>
<!--hacluster集群下有兩個(gè)namenode分別是nn1,nn2-->
<name>dfs.ha.namenodes.hacluster</name>
<value>nn1,nn2</value>
</property>
<!--nn1的rpc、servicepc和http通訊地址 -->
<property>
<name>dfs.namenode.rpc-address.hacluster.nn1</name>
<value>node15:9000</value>
</property>
<property>
<name>dfs.namenode.servicepc-address.hacluster.nn1</name>
<value>node15:53310</value>
</property>
<property>
<name>dfs.namenode.http-address.hacluster.nn1</name>
<value>node15:50070</value>
</property>
<!--nn2的rpc、servicepc和http通訊地址 -->
<property>
<name>dfs.namenode.rpc-address.hacluster.nn2</name>
<value>node16:9000</value>
</property>
<property>
<name>dfs.namenode.servicepc-address.hacluster.nn2</name>
<value>node16:53310</value>
</property>
<property>
<name>dfs.namenode.http-address.hacluster.nn2</name>
<value>node16:50070</value>
</property>
<property>
<!--指定Namenode的元數(shù)據(jù)在JournalNode上存放的位置-->
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node15:8485;node16:8485;node17:8485;node18:8485/hacluster</value>
</property>
<property>
<!--指定JournalNode在本地磁盤的存儲(chǔ)位置-->
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop-3.1.3/dfs/journalnode_data</value>
</property>
<property>
<!--指定namenode操作日志存儲(chǔ)位置-->
<name>dfs.namenode.edits.dir</name>
<value>/opt/hadoop-3.1.3/dfs/edits</value>
</property>
<property>
<!--開啟namenode故障轉(zhuǎn)移自動(dòng)切換-->
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<!--配置失敗自動(dòng)切換實(shí)現(xiàn)方式-->
<name>dfs.client.failover.proxy.provider.hacluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<!--配置隔離機(jī)制-->
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<!--配置隔離機(jī)制需要SSH免密登錄-->
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<!--hdfs文件操作權(quán)限 false為不驗(yàn)證-->
<name>dfs.premissions</name>
<value>false</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 指定mapreduce使用yarn資源管理器-->
<property>
<name>mapred.job.tracker</name>
<value>node15:9001</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 配置作業(yè)歷史服務(wù)器的地址-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>node15:10020</value>
</property>
<!-- 配置作業(yè)歷史服務(wù)器的http地址-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node15:19888</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/opt/hadoop-3.1.3/etc/hadoop:/opt/hadoop-3.1.3/share/hadoop/common/lib/*:/opt/hadoop-3.1.3/share/hadoop/common/*:/opt/hadoop-3.1.3/share/hadoop/hdfs:/opt/hadoop-3.1.3/share/hadoop/hdfs/lib/*:/opt/hadoop-3.1.3/share/hadoop/hdfs/*:/opt/hadoop-3.1.3/share/hadoop/mapreduce/lib/*:/opt/hadoop-3.1.3/share/hadoop/mapreduce/*:/opt/hadoop-3.1.3/share/hadoop/yarn:/opt/hadoop-3.1.3/share/hadoop/yarn/lib/*:/opt/hadoop-3.1.3/share/hadoop/yarn/*</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1500</value>
<description>每個(gè)Map任務(wù)的物理內(nèi)存限制</description>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3000</value>
<description>每個(gè)Reduce任務(wù)的物理內(nèi)存限制</description>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1200m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2600m</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
slaves
node15
node16
node17
node18
workers
node15
node16
node17
node18
yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<!-- 是否對(duì)容器強(qiáng)制執(zhí)行虛擬內(nèi)存限制 -->
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
<description>Whether virtual memory limits will be enforced for containers</description>
</property>
<property>
<!-- 為容器設(shè)置內(nèi)存限制時(shí)虛擬內(nèi)存與物理內(nèi)存之間的比率 -->
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
<description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
</property>
<property>
<!--開啟yarn高可用-->
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<!-- 指定Yarn集群在zookeeper上注冊(cè)的節(jié)點(diǎn)名-->
<name>yarn.resourcemanager.cluster-id</name>
<value>hayarn</value>
</property>
<property>
<!--指定兩個(gè)resourcemanager的名稱-->
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<!--指定rm1的主機(jī)-->
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node15</value>
</property>
<property>
<!--指定rm2的主機(jī)-->
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node16</value>
</property>
<property>
<!-- RM HTTP訪問地址 默認(rèn):${yarn.resourcemanager.hostname}:8088-->
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>node15:8088</value>
</property>
<property>
<!-- RM HTTP訪問地址 默認(rèn):${yarn.resourcemanager.hostname}:8088-->
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node16:8088</value>
</property>
<property>
<!--配置zookeeper的地址-->
<name>yarn.resourcemanager.zk-address</name>
<value>node15:2181,node16:2181,node17:2181</value>
</property>
<property>
<!--開啟yarn恢復(fù)機(jī)制-->
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<!--配置執(zhí)行resourcemanager恢復(fù)機(jī)制實(shí)現(xiàn)類-->
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<!--指定主resourcemanager的地址-->
<name>yarn.resourcemanager.hostname</name>
<value>node18</value>
</property>
<property>
<!--nodemanager獲取數(shù)據(jù)的方式-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<!--開啟日志聚集功能-->
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<!--配置日志保留7天-->
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node15:19888/jobhistory/logs</value>
</property>
</configuration>
hive
hive-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- jdbc連接的URL -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://node15:3306/metastore?useSSL=false</value>
</property>
<!-- jdbc連接的Driver-->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<!-- jdbc連接的username-->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!-- jdbc連接的password -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hadoop</value>
</property>
<!-- Hive默認(rèn)在HDFS的工作目錄 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<!-- Hive元數(shù)據(jù)存儲(chǔ)的驗(yàn)證 -->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!-- 元數(shù)據(jù)存儲(chǔ)授權(quán) -->
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
<!-- 指定hiveserver2連接的host -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>node15</value>
</property>
<!-- 指定hiveserver2連接的端口號(hào) -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>spark.yarn.jars</name>
<value>hdfs://node15:9000/spark-jars/*</value>
</property>
<!--Hive執(zhí)行引擎-->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<property>
<name>spark.home</name>
<value>/opt/spark-3.0.0-bin-hadoop3.2/</value>
</property>
</configuration>
spark-defaults.conf
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs://node15:9000/spark-history
spark.executor.memory 600m
spark.driver.memory 600m
spark
hdfs-site.xml
鏈接hadoop中的文件
ln -s 源文件名 新文件名
hive-site.xml
鏈接hive中的文件
ln -s 源文件名 新文件名文章來源:http://www.zghlxwxcb.cn/news/detail-725667.html
slaves
node15
node16
node17
node18
yarn-site.xml
鏈接hadoop中的文件
ln -s 源文件名 新文件名文章來源地址http://www.zghlxwxcb.cn/news/detail-725667.html
spark-env.sh
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
export SCALA_HOME=/usr/share/scala
export JAVA_HOME=/usr/java/jdk1.8.0_241-amd64
export SPARK_HOME=/opt/spark-3.0.0-bin-hadoop3.2
export SPARK_MASTER_IP=192.168.206.215
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=7080 #spark的web訪問端口默認(rèn)是8080,防止可能存在端口沖突,可以修
改端口號(hào)為其他的export SPARK_WORKER_CORES=1
export SPARK_WORKER_INSTANCES=1
export SPARK_EXECUTOR_MEMORY=512M
export SPARK_WORKER_MEMORY=1G
export SPARK_DIST_CLASSPATH=$(/opt/hadoop-3.1.3/bin/hadoop classpath)
export HADOOP_CONF_DIR=/opt/hadoop-3.1.3/etc/hadoop
# This file is sourced when running various Spark programs.
# Copy it as spark-env.sh and edit that to configure Spark for your site.
# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
# Options read by executors and drivers running inside the cluster
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
# Options read in YARN client/cluster mode
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
# Options for the daemons used in the standalone deploy mode
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2
g)# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
# - SPARK_WORKER_DIR, to set the working directory of worker processes
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default:
1g).# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y
")# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
# Options for launcher
# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")
# Generic options for the daemons used in the standalone deploy mode
# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS
到了這里,關(guān)于Hadoop-HA-Hive-on-Spark 4臺(tái)虛擬機(jī)安裝配置文件的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!