[TOC]

机器节点

节点 配置 IP 职责
hadoop01 8C/32G/500G 10.10.9.15
hadoop02 8C/32G/500G 10.10.9.16
hadoop03 8C/32G/500G 10.10.9.17
hive01 8C/24G/200G 10.10.9.21
hive02 8C/24G/200G 10.10.9.22
hive03 8C/24G/200G 10.10.9.23
ds01 8C/32G/200G 10.10.9.18
ds02 8C/32G/200G 10.10.9.19
ds03 8C/32G/200G 10.10.9.20
common-db 4C/8G/200G 10.10.9.24

通用安装

java

1yum install java-1.8.0-openjdk-devel
2
3export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.412.b08-1.el7_9.x86_64
4export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib/
5export PATH=$JAVA_HOME/bin:$PATH

python38

1cd /home/Python-3.8.19
2
3./configure --prefix=/soft/python38
4
5make
6
7# 这里使用 make altinstall 而不是 make install,以避免覆盖系统自带的 python 命令。
8sudo make altinstall
1cp /soft/python38/bin/python3.8  /soft/python38/bin/python3
1export PYTHON_LAUNCHER=/soft/python38
2export PATH=$PYTHON_LAUNCHER/bin:$PATH

配置免密登录

hadoop01,hadoop02,hadoop03,hive01,hive02,hive03, ds01,ds02,ds03配置/etc/hosts:

110.10.9.15      hadoop01
210.10.9.16      hadoop02
310.10.9.17      hadoop03
410.10.9.21      hive01
510.10.9.22      hive02
610.10.9.23      hive03
710.10.9.18      ds01
810.10.9.19      ds02
910.10.9.20      ds03

hadoop01,hadoop02,hadoop03,hive01,hive02,hive03, ds01,ds02,ds03执行:

 1$ ssh-keygen -t dsa
 2$ cat /root/.ssh/id_dsa.pub
 3$ touch /root/.ssh/authorized_keys
 4# 复制haddop01,hadoop02,hadoop03,hive01,hive02,hive03,ds01,ds02,ds03中/root/.ssh/id_dsa.pub内容到authorized_keys
 5# 复制hadoop01的authorized_keys到其他节点
 6$ scp /root/.ssh/authorized_keys root@hadoop02:/root/.ssh/
 7$ scp /root/.ssh/authorized_keys root@hadoop03:/root/.ssh/
 8$ scp /root/.ssh/authorized_keys root@hive01:/root/.ssh/
 9$ scp /root/.ssh/authorized_keys root@hive02:/root/.ssh/
10$ scp /root/.ssh/authorized_keys root@hive03:/root/.ssh/
11$ scp /root/.ssh/authorized_keys root@ds01:/root/.ssh/
12$ scp /root/.ssh/authorized_keys root@ds02:/root/.ssh/
13$ scp /root/.ssh/authorized_keys root@ds03:/root/.ssh/

MYSQL

1hive/Tlwl@2024
2dolphinscheduler/Tlwl@2024

HADOOP集群

配置集群

新建目录

hadoop01,hadoop02,hadoop03新增以下目录

1mkdir  /root/hadoop
2mkdir  /root/hadoop/tmp
3mkdir  /root/hadoop/var
4mkdir  /root/hadoop/dfs
5mkdir  /root/hadoop/dfs/name
6mkdir  /root/hadoop/dfs/data

配置文件 core-site.xml

hadoop01,hadoop02,hadoop03上core-site.xml增加如下配置项:

 1   <property>
 2        <name>hadoop.tmp.dir</name>
 3        <value>/root/hadoop/tmp</value>
 4        <description>Abase for other temporary directories.</description>
 5   </property>
 6   <property>
 7        <name>fs.default.name</name>
 8        <value>hdfs://hadoop01:9000</value>
 9   </property>
10
11   <property>
12        <name>hadoop.proxyuser.root.groups</name>
13        <value>*</value>
14   </property>
15   <property>
16        <name>hadoop.proxyuser.root.hosts</name>
17        <value>*</value>
18   </property>
19
20    <property>
21        <name>hadoop.proxyuser.hive.groups</name>
22        <value>*</value>
23    </property>
24    <property>
25        <name>hadoop.proxyuser.hive.hosts</name>
26        <value>*</value>
27    </property>

配置文件 hadoop-env.sh

hadoop01,hadoop02,hadoop03上hadoop-env.sh打开如下配置项:

1export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.412.b08-1.el7_9.x86_64

配置文件 hdfs-site.xml

hadoop01,hadoop02,hadoop03上hdfs-site.xml 打开如下配置项:

 1<property>
 2   <name>dfs.name.dir</name>
 3   <value>/root/hadoop/dfs/name</value>
 4   <description>Path on the local filesystem where theNameNode stores the namespace and transactions logs persistently.</description>
 5</property>
 6<property>
 7   <name>dfs.data.dir</name>
 8   <value>/root/hadoop/dfs/data</value>
 9   <description>Comma separated list of paths on the localfilesystem of a DataNode where it should store its blocks.</description>
10</property>
11<property>
12   <name>dfs.replication</name>
13   <value>2</value>
14</property>
15<property>
16      <name>dfs.permissions</name>
17      <value>false</value>
18      <description>need not permissions</description>
19</property>
20<property>
21  <name>dfs.client.read.shortcircuit</name>
22  <value>false</value>
23</property>

配置文件 mapred-site.xml

hadoop01

 1<property>
 2   <name>mapred.job.tracker</name>
 3   <value>hadoop01:49001</value>
 4</property>
 5<property>
 6      <name>mapred.local.dir</name>
 7       <value>/root/hadoop/var</value>
 8</property>
 9<property>
10       <name>mapreduce.framework.name</name>
11       <value>yarn</value>
12</property>
13<property>
14  <name>yarn.app.mapreduce.am.env</name>
15  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
16</property>
17<property>
18  <name>mapreduce.map.env</name>
19  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
20</property>
21<property>
22  <name>mapreduce.reduce.env</name>
23  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
24</property>

hadoop02

 1<property>
 2   <name>mapred.job.tracker</name>
 3   <value>hadoop02:49001</value>
 4</property>
 5<property>
 6      <name>mapred.local.dir</name>
 7       <value>/root/hadoop/var</value>
 8</property>
 9<property>
10       <name>mapreduce.framework.name</name>
11       <value>yarn</value>
12</property>
13
14<property>
15  <name>yarn.app.mapreduce.am.env</name>
16  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
17</property>
18<property>
19  <name>mapreduce.map.env</name>
20  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
21</property>
22<property>
23  <name>mapreduce.reduce.env</name>
24  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
25</property>

hadoop03

 1<property>
 2   <name>mapred.job.tracker</name>
 3   <value>hadoop03:49001</value>
 4</property>
 5<property>
 6      <name>mapred.local.dir</name>
 7       <value>/root/hadoop/var</value>
 8</property>
 9<property>
10       <name>mapreduce.framework.name</name>
11       <value>yarn</value>
12</property>
13
14<property>
15  <name>yarn.app.mapreduce.am.env</name>
16  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
17</property>
18<property>
19  <name>mapreduce.map.env</name>
20  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
21</property>
22<property>
23  <name>mapreduce.reduce.env</name>
24  <value>HADOOP_MAPRED_HOME=/soft/hadoop-3.2.4</value>
25</property>

配置文件 workers

hadoop01

1hadoop01
2hadoop02
3hadoop03

hadoop02

1hadoop01
2hadoop02
3hadoop03

hadoop03

1hadoop01
2hadoop02
3hadoop03

配置文件 yarn-site.xml

hadoop01,hadoop02,hadoop03上yarn-site.xml增加如下配置项:

 1   <property>
 2        <name>yarn.resourcemanager.hostname</name>
 3        <value>hadoop01</value>
 4   </property>
 5   <property>
 6        <description>The address of the applications manager interface in the RM.</description>
 7        <name>yarn.resourcemanager.address</name>
 8        <value>${yarn.resourcemanager.hostname}:8032</value>
 9   </property>
10   <property>
11        <description>The address of the scheduler interface.</description>
12        <name>yarn.resourcemanager.scheduler.address</name>
13        <value>${yarn.resourcemanager.hostname}:8030</value>
14   </property>
15   <property>
16        <description>The http address of the RM web application.</description>
17        <name>yarn.resourcemanager.webapp.address</name>
18        <value>${yarn.resourcemanager.hostname}:8088</value>
19   </property>
20   <property>
21        <description>The https adddress of the RM web application.</description>
22        <name>yarn.resourcemanager.webapp.https.address</name>
23        <value>${yarn.resourcemanager.hostname}:8090</value>
24   </property>
25   <property>
26        <name>yarn.resourcemanager.resource-tracker.address</name>
27        <value>${yarn.resourcemanager.hostname}:8031</value>
28   </property>
29   <property>
30        <description>The address of the RM admin interface.</description>
31        <name>yarn.resourcemanager.admin.address</name>
32        <value>${yarn.resourcemanager.hostname}:8033</value>
33   </property>
34   <property>
35        <name>yarn.nodemanager.aux-services</name>
36        <value>mapreduce_shuffle</value>
37   </property>
38   <property>
39        <name>yarn.scheduler.maximum-allocation-mb</name>
40        <value>4096</value>
41   </property>
42   <property>
43        <name>yarn.nodemanager.vmem-pmem-ratio</name>
44        <value>2.1</value>
45   </property>
46   <property>
47        <name>yarn.nodemanager.resource.memory-mb</name>
48        <value>4096</value>
49   </property>
50   <property>
51        <name>yarn.nodemanager.vmem-check-enabled</name>
52        <value>false</value>
53</property>

启动脚本 start-dfs.sh 、stop-dfs.sh

hadoop01,hadoop02,hadoop03上start-dfs.sh 、stop-dfs.sh两个文件开头位置添加如下配置:

1HDFS_DATANODE_USER=root
2HADOOP_SECURE_DN_USER=root
3HDFS_NAMENODE_USER=root
4HDFS_SECONDARYNAMENODE_USER=root

启动脚本 start-yarn.sh 、stop-yarn.sh

hadoop01,hadoop02,hadoop03上start-yarn.sh 、stop-yarn.sh两个文件开头位置添加如下配置:

1YARN_RESOURCEMANAGER_USER=root
2HADOOP_SECURE_DN_USER=root
3YARN_NODEMANAGER_USER=root

初始化hdfs

进入hadoop01节点,初始化hdfs:

(因为hadoop01是namenode,hadoop02和hadoop03都是datanode,只需要对hadoop01进行初始化操作,即对hdfs进行格式化)

1cd /soft/hadoop-3.2.4/bin
2./hadoop  namenode  -format

进入hadoop01节点,启动dfs,初始化hdfs集群(保证三个节点都有dataNode)

1cd /soft/hadoop-3.2.4/sbin
2start-dfs.sh
3stop-dfs.sh

启动集群

进入hadoop01节点

1cd /soft/hadoop-3.2.4/ && bash ./sbin/start-all.sh

停止集群

进入hadoop01节点

1cd /soft/hadoop-3.2.4/ && bash ./sbin/stop-all.sh

添加环境变量

hadoop01,hadoop02,hadoop03上/etc/profile

1export HADOOP_HOME=/soft/hadoop-3.2.4
2export PATH=$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$PATH

修改hdfs路径权限(按需)

1# 查看当前权限
2hdfs dfs -ls /
3# 修改/tmp目录权限
4hdfs dfs -chmod -R 777 /tmp

调整hadoop关键参数(按需)

1参考:https://blog.csdn.net/u013412066/article/details/129095011

HIVE集群

hive版本:3.1.3 zookeeper版本:3.9.0

ZOOKEEPER配置

hive01上/soft/zookeeper-3.9.0-bin/conf/zoo.cfg

1tickTime=2000
2initLimit=10
3syncLimit=5
4dataDir=/opt/data/zookeeper
5clientPort=2181
6server.1=hive01:2888:3888
7server.2=hive02:2888:3888
8server.3=hive03:2888:3888

hive02上/soft/zookeeper-3.9.0-bin/conf/zoo.cfg

1tickTime=2000
2initLimit=10
3syncLimit=5
4dataDir=/opt/data/zookeeper
5clientPort=2181
6server.1=hive01:2888:3888
7server.2=hive02:2888:3888
8server.3=hive03:2888:3888

hive03上/soft/zookeeper-3.9.0-bin/conf/zoo.cfg

1tickTime=2000
2initLimit=10
3syncLimit=5
4dataDir=/opt/data/zookeeper
5clientPort=2181
6server.1=hive01:2888:3888
7server.2=hive02:2888:3888
8server.3=hive03:2888:3888

hive01,hive02,hive03

1echo "1" > /opt/data/zookeeper/myid # 在 hive01 上
2echo "2" > /opt/data/zookeeper/myid # 在 hive02 上
3echo "3" > /opt/data/zookeeper/myid # 在 hive03 上

启动

hive01,hive02,hive03

1/soft/zookeeper-3.9.0-bin/bin/zkServer.sh start

HIVE配置

hive01上的hive-site.xml

  1<?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3<configuration>
  4    <!-- metabase server -->
  5    <property>
  6        <name>javax.jdo.option.ConnectionURL</name>
  7        <value>jdbc:mysql://10.10.9.24:3306/hivedb?useUnicode=true&amp;characterEncoding=UTF-8&amp;useSSL=false&amp;createDatabaseIfNotExist=true</value>
  8    </property>
  9    <property>
 10        <name>javax.jdo.option.ConnectionDriverName</name>
 11        <value>com.mysql.cj.jdbc.Driver</value>
 12    </property>
 13    <property>
 14        <name>javax.jdo.option.ConnectionUserName</name>
 15        <value>hive</value>
 16    </property>
 17    <property>
 18        <name>javax.jdo.option.ConnectionPassword</name>
 19        <value>Tlwl@2024</value>
 20    </property>
 21    <property>
 22        <name>hive.metastore.schema.verification</name>
 23        <value>false</value>
 24    </property>
 25    <property>
 26        <name>hive.cli.print.current.db</name>
 27        <value>true</value>
 28    </property>
 29    <property>
 30        <name>hive.cli.print.header</name>
 31        <value>true</value>
 32    </property>
 33        <property>
 34        <name>hive.metastore.uris</name>
 35        <value>thrift://hive01:9083,thrift://hive02:9083,thrift://hive03:9083</value>
 36    </property>
 37
 38    <!-- hiveserver2 -->
 39    <property>
 40        <name>hive.server2.thrift.port</name>
 41        <value>10000</value>
 42    </property>
 43    <property>
 44        <name>hive.server2.thrift.bind.host</name>
 45        <value>hive01</value>
 46        <description>hiveserver2 bind hivenode hostname</description>
 47    </property>
 48
 49    <property>
 50        <name>hive.server2.support.dynamic.service.discovery</name>
 51        <value>true</value>
 52    </property>
 53    <property>
 54        <name>hive.zookeeper.quorum</name>
 55        <value>hive01:2181,hive02:2181,hive03:2181</value>
 56    </property>
 57    <property>
 58        <name>hive.zookeeper.client.port</name>
 59        <value>2181</value>
 60    </property>
 61    <property>
 62        <name>hive.server2.zookeeper.namespace</name>
 63        <value>hiveserver2</value>
 64    </property>
 65
 66    <property>
 67        <name>hive.server2.thrift.client.user</name>
 68        <value>root</value>
 69    </property>
 70    <property>
 71        <name>hive.server2.thrift.client.password</name>
 72        <value>Tlwl@7777</value>
 73    </property>
 74
 75    <property>
 76        <name>hive.execution.engine</name>
 77        <value>mr</value>
 78        <description>mr|tez|spark,default mr</description>
 79    </property>
 80
 81    <!-- for hive user config -->
 82    <property>
 83        <name>hive.server2.enable.doAs</name>
 84        <value>false</value>
 85        <description>false: yarn job as hive; true: yarn job as real username</description>
 86    </property>
 87    <!--
 88    <property>
 89        <name>hive.server2.authentication</name>
 90        <value>NONE</value>
 91    </property>
 92    <property>
 93        <name>hive.security.authorization.enabled</name>
 94        <value>false</value>
 95    </property>
 96    <property>
 97        <name>hive.security.authorization.manager</name>
 98        <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
 99    </property>
100    -->
101    <property>
102        <name>hive.users.in.admin.role</name>
103        <value>root</value>
104        <description>Users in admin role</description>
105    </property>
106    <property>
107        <name>hive.exec.scratchdir</name>
108        <value>/user/hive/tmp</value>
109        <description>Hive query execution temporary scratch directory.</description>
110    </property>
111    <!-- 
112    <property>
113        <name>hadoop.proxyuser.hive.groups</name>
114        <value>*</value>
115    </property>
116    <property>
117        <name>hadoop.proxyuser.hive.hosts</name>
118        <value>*</value>
119    </property>
120    -->
121    <!-- for support merge into -->
122    <property>
123        <name>hive.support.concurrency</name>
124        <value>true</value>
125    </property>
126    <property>
127        <name>hive.txn.manager</name>
128        <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
129    </property>
130    <property>
131        <name>hive.compactor.initiator.on</name>
132        <value>true</value>
133    </property>
134    <property>
135        <name>hive.compactor.worker.threads</name>
136        <value>1</value>
137    </property>
138    <property>
139        <name>hive.enforce.bucketing</name>
140        <value>true</value>
141    </property>
142    <property>
143        <name>hive.enforce.sorting</name>
144        <value>true</value>
145    </property>
146    <property>
147        <name>hive.exec.dynamic.partition.mode</name>
148        <value>nonstrict</value>
149    </property>
150    <property>
151        <name>hive.exec.max.dynamic.partitions</name>
152        <value>1000</value>
153    </property>
154    <property>
155        <name>hive.exec.max.dynamic.partitions.pernode</name>
156        <value>100</value>
157    </property>
158    <!-- HDFS Namenode address -->
159    <property>
160      <name>fs.defaultFS</name>
161      <value>hdfs://hadoop01:9000</value>
162      <description>The HDFS namenode URI.</description>
163    </property>
164
165    <!-- YARN ResourceManager address -->
166    <property>
167      <name>yarn.resourcemanager.address</name>
168      <value>hadoop01:8032</value>
169      <description>The YARN ResourceManager address.</description>
170    </property>
171</configuration>

hive02上的hive-site.xml

  1<?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3<configuration>
  4    <!-- metabase server -->
  5    <property>
  6        <name>javax.jdo.option.ConnectionURL</name>
  7        <value>jdbc:mysql://10.10.9.24:3306/hivedb?useUnicode=true&amp;characterEncoding=UTF-8&amp;useSSL=false&amp;createDatabaseIfNotExist=true</value>
  8    </property>
  9    <property>
 10        <name>javax.jdo.option.ConnectionDriverName</name>
 11        <value>com.mysql.cj.jdbc.Driver</value>
 12    </property>
 13    <property>
 14        <name>javax.jdo.option.ConnectionUserName</name>
 15        <value>hive</value>
 16    </property>
 17    <property>
 18        <name>javax.jdo.option.ConnectionPassword</name>
 19        <value>Tlwl@2024</value>
 20    </property>
 21    <property>
 22        <name>hive.metastore.schema.verification</name>
 23        <value>false</value>
 24    </property>
 25    <property>
 26        <name>hive.cli.print.current.db</name>
 27        <value>true</value>
 28    </property>
 29    <property>
 30        <name>hive.cli.print.header</name>
 31        <value>true</value>
 32    </property>
 33        <property>
 34        <name>hive.metastore.uris</name>
 35        <value>thrift://hive01:9083,thrift://hive02:9083,thrift://hive03:9083</value>
 36    </property>
 37
 38    <!-- hiveserver2 -->
 39    <property>
 40        <name>hive.server2.thrift.port</name>
 41        <value>10000</value>
 42    </property>
 43    <property>
 44        <name>hive.server2.thrift.bind.host</name>
 45        <value>hive02</value>
 46        <description>hiveserver2 bind hivenode hostname</description>
 47    </property>
 48
 49    <property>
 50        <name>hive.server2.support.dynamic.service.discovery</name>
 51        <value>true</value>
 52    </property>
 53    <property>
 54        <name>hive.zookeeper.quorum</name>
 55        <value>hive01:2181,hive02:2181,hive03:2181</value>
 56    </property>
 57    <property>
 58        <name>hive.zookeeper.client.port</name>
 59        <value>2181</value>
 60    </property>
 61    <property>
 62        <name>hive.server2.zookeeper.namespace</name>
 63        <value>hiveserver2</value>
 64    </property>
 65
 66    <property>
 67        <name>hive.server2.thrift.client.user</name>
 68        <value>root</value>
 69    </property>
 70    <property>
 71        <name>hive.server2.thrift.client.password</name>
 72        <value>Tlwl@7777</value>
 73    </property>
 74
 75    <property>
 76        <name>hive.execution.engine</name>
 77        <value>mr</value>
 78        <description>mr|tez|spark,default mr</description>
 79    </property>
 80
 81    <!-- for hive user config -->
 82    <property>
 83        <name>hive.server2.enable.doAs</name>
 84        <value>false</value>
 85        <description>false: yarn job as hive; true: yarn job as real username</description>
 86    </property>
 87    <!--
 88    <property>
 89        <name>hive.server2.authentication</name>
 90        <value>NONE</value>
 91    </property>
 92    <property>
 93        <name>hive.security.authorization.enabled</name>
 94        <value>false</value>
 95    </property>
 96    <property>
 97        <name>hive.security.authorization.manager</name>
 98        <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
 99    </property>
100    -->
101    <property>
102        <name>hive.users.in.admin.role</name>
103        <value>root</value>
104        <description>Users in admin role</description>
105    </property>
106    <property>
107        <name>hive.exec.scratchdir</name>
108        <value>/user/hive/tmp</value>
109        <description>Hive query execution temporary scratch directory.</description>
110    </property>
111    <!-- 
112    <property>
113        <name>hadoop.proxyuser.hive.groups</name>
114        <value>*</value>
115    </property>
116    <property>
117        <name>hadoop.proxyuser.hive.hosts</name>
118        <value>*</value>
119    </property>
120    -->
121    <!-- for support merge into -->
122    <property>
123        <name>hive.support.concurrency</name>
124        <value>true</value>
125    </property>
126    <property>
127        <name>hive.txn.manager</name>
128        <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
129    </property>
130    <property>
131        <name>hive.compactor.initiator.on</name>
132        <value>true</value>
133    </property>
134    <property>
135        <name>hive.compactor.worker.threads</name>
136        <value>1</value>
137    </property>
138    <property>
139        <name>hive.enforce.bucketing</name>
140        <value>true</value>
141    </property>
142    <property>
143        <name>hive.enforce.sorting</name>
144        <value>true</value>
145    </property>
146    <property>
147        <name>hive.exec.dynamic.partition.mode</name>
148        <value>nonstrict</value>
149    </property>
150    <property>
151        <name>hive.exec.max.dynamic.partitions</name>
152        <value>1000</value>
153    </property>
154    <property>
155        <name>hive.exec.max.dynamic.partitions.pernode</name>
156        <value>100</value>
157    </property>
158    <!-- HDFS Namenode address -->
159    <property>
160      <name>fs.defaultFS</name>
161      <value>hdfs://hadoop01:9000</value>
162      <description>The HDFS namenode URI.</description>
163    </property>
164
165    <!-- YARN ResourceManager address -->
166    <property>
167      <name>yarn.resourcemanager.address</name>
168      <value>hadoop01:8032</value>
169      <description>The YARN ResourceManager address.</description>
170    </property>
171</configuration>

hive03上的hive-site.xml

  1<?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3<configuration>
  4    <!-- metabase server -->
  5    <property>
  6        <name>javax.jdo.option.ConnectionURL</name>
  7        <value>jdbc:mysql://10.10.9.24:3306/hivedb?useUnicode=true&amp;characterEncoding=UTF-8&amp;useSSL=false&amp;createDatabaseIfNotExist=true</value>
  8    </property>
  9    <property>
 10        <name>javax.jdo.option.ConnectionDriverName</name>
 11        <value>com.mysql.cj.jdbc.Driver</value>
 12    </property>
 13    <property>
 14        <name>javax.jdo.option.ConnectionUserName</name>
 15        <value>hive</value>
 16    </property>
 17    <property>
 18        <name>javax.jdo.option.ConnectionPassword</name>
 19        <value>Tlwl@2024</value>
 20    </property>
 21    <property>
 22        <name>hive.metastore.schema.verification</name>
 23        <value>false</value>
 24    </property>
 25    <property>
 26        <name>hive.cli.print.current.db</name>
 27        <value>true</value>
 28    </property>
 29    <property>
 30        <name>hive.cli.print.header</name>
 31        <value>true</value>
 32    </property>
 33        <property>
 34        <name>hive.metastore.uris</name>
 35        <value>thrift://hive01:9083,thrift://hive02:9083,thrift://hive03:9083</value>
 36    </property>
 37
 38    <!-- hiveserver2 -->
 39    <property>
 40        <name>hive.server2.thrift.port</name>
 41        <value>10000</value>
 42    </property>
 43    <property>
 44        <name>hive.server2.thrift.bind.host</name>
 45        <value>hive03</value>
 46        <description>hiveserver2 bind hivenode hostname</description>
 47    </property>
 48
 49    <property>
 50        <name>hive.server2.support.dynamic.service.discovery</name>
 51        <value>true</value>
 52    </property>
 53    <property>
 54        <name>hive.zookeeper.quorum</name>
 55        <value>hive01:2181,hive02:2181,hive03:2181</value>
 56    </property>
 57    <property>
 58        <name>hive.zookeeper.client.port</name>
 59        <value>2181</value>
 60    </property>
 61    <property>
 62        <name>hive.server2.zookeeper.namespace</name>
 63        <value>hiveserver2</value>
 64    </property>
 65
 66    <property>
 67        <name>hive.server2.thrift.client.user</name>
 68        <value>root</value>
 69    </property>
 70    <property>
 71        <name>hive.server2.thrift.client.password</name>
 72        <value>Tlwl@7777</value>
 73    </property>
 74
 75    <property>
 76        <name>hive.execution.engine</name>
 77        <value>mr</value>
 78        <description>mr|tez|spark,default mr</description>
 79    </property>
 80
 81    <!-- for hive user config -->
 82    <property>
 83        <name>hive.server2.enable.doAs</name>
 84        <value>false</value>
 85        <description>false: yarn job as hive; true: yarn job as real username</description>
 86    </property>
 87    <!--
 88    <property>
 89        <name>hive.server2.authentication</name>
 90        <value>NONE</value>
 91    </property>
 92    <property>
 93        <name>hive.security.authorization.enabled</name>
 94        <value>false</value>
 95    </property>
 96    <property>
 97        <name>hive.security.authorization.manager</name>
 98        <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
 99    </property>
100    -->
101    <property>
102        <name>hive.users.in.admin.role</name>
103        <value>root</value>
104        <description>Users in admin role</description>
105    </property>
106    <property>
107        <name>hive.exec.scratchdir</name>
108        <value>/user/hive/tmp</value>
109        <description>Hive query execution temporary scratch directory.</description>
110    </property>
111    <!-- 
112    <property>
113        <name>hadoop.proxyuser.hive.groups</name>
114        <value>*</value>
115    </property>
116    <property>
117        <name>hadoop.proxyuser.hive.hosts</name>
118        <value>*</value>
119    </property>
120    -->
121    <!-- for support merge into -->
122    <property>
123        <name>hive.support.concurrency</name>
124        <value>true</value>
125    </property>
126    <property>
127        <name>hive.txn.manager</name>
128        <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
129    </property>
130    <property>
131        <name>hive.compactor.initiator.on</name>
132        <value>true</value>
133    </property>
134    <property>
135        <name>hive.compactor.worker.threads</name>
136        <value>1</value>
137    </property>
138    <property>
139        <name>hive.enforce.bucketing</name>
140        <value>true</value>
141    </property>
142    <property>
143        <name>hive.enforce.sorting</name>
144        <value>true</value>
145    </property>
146    <property>
147        <name>hive.exec.dynamic.partition.mode</name>
148        <value>nonstrict</value>
149    </property>
150    <property>
151        <name>hive.exec.max.dynamic.partitions</name>
152        <value>1000</value>
153    </property>
154    <property>
155        <name>hive.exec.max.dynamic.partitions.pernode</name>
156        <value>100</value>
157    </property>
158    <!-- HDFS Namenode address -->
159    <property>
160      <name>fs.defaultFS</name>
161      <value>hdfs://hadoop01:9000</value>
162      <description>The HDFS namenode URI.</description>
163    </property>
164
165    <!-- YARN ResourceManager address -->
166    <property>
167      <name>yarn.resourcemanager.address</name>
168      <value>hadoop01:8032</value>
169      <description>The YARN ResourceManager address.</description>
170    </property>
171</configuration>

初始化Hive Metastore数据库-Mysql数据库

在hive01上根据hive-site.xml配置,使用如下命令可以初始化数据库。

1$ schematool -initSchema -dbType mysql

因为sql初始化脚本/soft/hive/scripts/metastore/upgrade/mysql中默认编码是latin1,故需修改字段编码,避免COMMENT字段显示乱码:

 1-- hive 3.1.3/4.0.0
 2alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
 3alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
 4alter table PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
 5alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;
 6alter table TABLE_PARAMS modify column PARAM_KEY varchar(256) character set utf8;
 7alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
 8alter table database_params modify column PARAM_KEY varchar(180) character set utf8;
 9alter table database_params modify column PARAM_VALUE varchar(4000) character set utf8;
10-- hive3.1.3 only
11-- alter table INDEX_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; 

启动

hive01,hive02,hive03

1#后台启动
2cd /soft/hive && HADOOP_OPTS="-Xmx2g" nohup ./bin/hive --service metastore &> /soft/hive/logs/hive-meta.log &
3#后台启动server2
4cd /soft/hive && HADOOP_OPTS="-Xmx4g" nohup ./bin/hive --service hiveserver2 &> /soft/hive/logs/hiveserver2.log &

客户端连接

 1$ beeline
 2$ !connect jdbc:hive2://hive01,hive02,hive03/;password=Tlwl7777;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2  root
 3
 4##### 以下测试建表与查询 #####
 5
 6CREATE TABLE base(
 7   id int COMMENT "ID",
 8   name string COMMENT '姓名',
 9   address string COMMENT '地址',
10   lastmodificationtime timestamp)
11    COMMENT '基础表'
12  CLUSTERED BY (id) into 5 buckets 
13  STORED AS ORC TBLPROPERTIES ('transactional'='true');
14  
15INSERT INTO TABLE base VALUES
16    (1, 'John Doe1', '北京', '2024-07-08 10:10:10'),
17    (2, 'John Doe2', '北京', '2024-07-08 10:10:10'),
18    (3, 'John Doe3', '北京', '2024-07-08 10:10:10');

推荐使用dbeaver作为客户端连接。

使用zookeeper客户端查看节点信息

1$ cd /soft/zookeeper-3.9.0-bin && ./bin/zkCli.sh -server 127.0.0.1:2181
2$ ls /
3$ ls /hiveserver2
4[serverUri=hive01:10000;version=3.1.3;sequence=0000000022, serverUri=hive02:10000;version=3.1.3;sequence=0000000023, serverUri=hive03:10000;version=3.1.3;sequence=0000000024]

调整hive关键参数(按需)

1参考:

DholphiScheduler集群

参考:https://dolphinscheduler.apache.org/zh-cn/docs/3.2.0/guide/installation/pseudo-cluster

免密安装

需保证所有hadoop,hive,ds节点均能免密登录,第一步已经完整,这里不再操作

下载

apache-dolphinscheduler-3.2.0-bin.tar.gz,解压到/home/apache-dolphinscheduler-3.2.0-bin

mysql-connector-java-8.0.16.jar,复制到/home/apache-dolphinscheduler-3.2.0-bin/{xxx}-server/libs下

初始化数据库

mysql5.7新增数据库dolphinscheduler,初始化/home/apache-dolphinscheduler-3.2.0-bin/tools/sql/sql/dolphinscheduler_mysql.sql脚本数据进入数据库

安装

新增用户

创建部署用户,并且一定要配置 sudo 免密。以创建 dolphinscheduler 用户为例

 1# 创建用户需使用 root 登录
 2useradd dolphinscheduler
 3
 4# 添加密码
 5echo "dolphinscheduler" | passwd --stdin dolphinscheduler
 6
 7# 配置 sudo 免密
 8sed -i '$adolphinscheduler  ALL=(ALL)  NOPASSWD: NOPASSWD: ALL' /etc/sudoers
 9sed -i 's/Defaults    requirett/#Defaults    requirett/g' /etc/sudoers
10
11# 修改目录权限,使得部署用户对二进制包解压后的 apache-dolphinscheduler-*-bin 目录有操作权限
12chown -R dolphinscheduler:dolphinscheduler apache-dolphinscheduler-*-bin
13chmod -R 755 apache-dolphinscheduler-*-bin
修改配置
1cd /home/apache-dolphinscheduler-3.2.0-bin/bin/env

修改 dolphinscheduler_env.sh

 1# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
 2#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*
 3#export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH
 4#export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS
 5#export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS
 6#export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS
 7
 8
 9# JAVA_HOME, will use it to start DolphinScheduler server
10export JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.412.b08-1.el7_9.x86_64}
11
12# Database related configuration, set database type, username and password
13export DATABASE=${DATABASE:-mysql}
14export SPRING_PROFILES_ACTIVE=${DATABASE}
15export SPRING_DATASOURCE_URL="jdbc:mysql://10.10.9.24:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false"
16export SPRING_DATASOURCE_USERNAME="dolphinscheduler"
17export SPRING_DATASOURCE_PASSWORD="Tlwl@2024"
18
19# DolphinScheduler server related configuration
20export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none}
21export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-UTC}
22
23# Registry center configuration, determines the type and link of the registry center
24export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper}
25export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-hive01:2181,hive02:2181,hive03:2181}
26
27# Tasks related configurations, need to change the configuration if you use the related tasks.
28export HADOOP_HOME=${HADOOP_HOME:-/soft/hadoop-3.2.4}
29export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/soft/hadoop-3.2.4/etc/hadoop}
30export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
31export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/soft/python38/bin/python3}
32export HIVE_HOME=${HIVE_HOME:-/soft/hive}
33export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
34export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/soft/datax/bin/datax.py}
35
36export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH

修改install_env.sh

 1# ---------------------------------------------------------
 2# INSTALL MACHINE
 3# ---------------------------------------------------------
 4# A comma separated list of machine hostname or IP would be installed DolphinScheduler,
 5# including master, worker, api, alert. If you want to deploy in pseudo-distributed
 6# mode, just write a pseudo-distributed hostname
 7# Example for hostnames: ips="ds1,ds2,ds3,ds4,ds5", Example for IPs: ips="192.168.8.1,192.168.8.2,192.168.8.3,192.168.8.4,192.168.8.5"
 8ips=${ips:-"ds01,ds02,ds03"}
 9
10# Port of SSH protocol, default value is 22. For now we only support same port in all `ips` machine
11# modify it if you use different ssh port
12sshPort=${sshPort:-"22"}
13
14# A comma separated list of machine hostname or IP would be installed Master server, it
15# must be a subset of configuration `ips`.
16# Example for hostnames: masters="ds1,ds2", Example for IPs: masters="192.168.8.1,192.168.8.2"
17masters=${masters:-"ds01"}
18
19# A comma separated list of machine <hostname>:<workerGroup> or <IP>:<workerGroup>.All hostname or IP must be a
20# subset of configuration `ips`, And workerGroup have default value as `default`, but we recommend you declare behind the hosts
21# Example for hostnames: workers="ds1:default,ds2:default,ds3:default", Example for IPs: workers="192.168.8.1:default,192.168.8.2:default,192.168.8.3:default"
22workers=${workers:-"ds03:default,ds02:default,ds01:default"}
23
24# A comma separated list of machine hostname or IP would be installed Alert server, it
25# must be a subset of configuration `ips`.
26# Example for hostname: alertServer="ds3", Example for IP: alertServer="192.168.8.3"
27alertServer=${alertServer:-"ds02"}
28
29# A comma separated list of machine hostname or IP would be installed API server, it
30# must be a subset of configuration `ips`.
31# Example for hostname: apiServers="ds1", Example for IP: apiServers="192.168.8.1"
32apiServers=${apiServers:-"ds03"}
33
34# The directory to install DolphinScheduler for all machine we config above. It will automatically be created by `install.sh` script if not exists.
35# Do not set this configuration same as the current path (pwd). Do not add quotes to it if you using related path.
36installPath=${installPath:-"/soft/dolphinscheduler"}
37
38# The user to deploy DolphinScheduler for all machine we config above. For now user must create by yourself before running `install.sh`
39# script. The user needs to have sudo privileges and permissions to operate hdfs. If hdfs is enabled than the root directory needs
40# to be created by this user
41deployUser=${deployUser:-"dolphinscheduler"}
42
43# The root of zookeeper, for now DolphinScheduler default registry server is zookeeper.
44# It will delete ${zkRoot} in the zookeeper when you run install.sh, so please keep it same as registry.zookeeper.namespace in yml files.
45# Similarly, if you want to modify the value, please modify registry.zookeeper.namespace in yml files as well.
46zkRoot=${zkRoot:-"/dolphinscheduler"}

执行安装脚本,复制程序到ds01,ds02,ds03的/soft/dolphinscheduler,并会根据install_env.sh的配置在每个节点启动响应的进程。

1cd /home/apache-dolphinscheduler-3.2.0-bin/bin/
2./install.sh
访问地址

ApiApplicationServer部署在ds03,故访问地址为:http://10.10.9.20:12345/dolphinscheduler/ui/

用户/密码:admin/dolphinscheduler123

其他

复制datax到ds01,ds02,ds03的/soft/datax

复制hive01上的/soft/hive到ds01,ds02,ds03的/soft/hive;

1`注意`:hive-site.xml中的hive.server2.thrift.bind.host不影响ds01的调度,因为ds与hive隔离部署,ds节点上没有hiveserver2,此参数不会导致

复制hadoop01上的/soft/hadoop3.2.4到ds01,ds02,ds03的/soft/hadoop3.2.4