Featured image of post Hadoop Hive Docker

Hadoop Hive Docker

Dockerfile

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
FROM yzy613/hadoop:3.3.6
LABEL maintainer="yijiong" github="github.com/yzy613"

# install hive
COPY apache-hive-3.1.3-bin.tar.gz /tmp/apache-hive-3.1.3-bin.tar.gz
RUN cd /tmp && \
	tar -zxvf apache-hive-3.1.3-bin.tar.gz && \
	mv apache-hive-3.1.3-bin /usr/local/hive && \
	rm -f /tmp/apache-hive-3.1.3-bin.tar.gz && \
	cd - && \
	rm -f /usr/local/hive/lib/guava-19.0.jar && \
	cp /usr/local/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /usr/local/hive/lib/ && \
	mv /usr/local/hive/conf/hive-env.sh.template /usr/local/hive/conf/hive-env.sh && \
	echo "HADOOP_HOME=/usr/local/hadoop" >> /usr/local/hive/conf/hive-env.sh && \
	echo "export HIVE_CONF_DIR=/usr/local/hive/conf" >> /usr/local/hive/conf/hive-env.sh && \
	echo "export HIVE_AUX_JARS_PATH=/usr/local/hive/lib" >> /usr/local/hive/conf/hive-env.sh

# hive env
ENV HIVE_HOME=/usr/local/hive
ENV PATH=$PATH:$HIVE_HOME/bin
ENV HIVE_CONF_DIR=/usr/local/hive/conf
ENV HIVE_AUX_JARS_PATH=/usr/local/hive/lib

# hive config
COPY hive-conf/* /usr/local/hive/conf/

# hive lib
COPY hive-lib/* /usr/local/hive/lib/

# init hive remote metastore
#CMD schematool -initSchema -dbType mysql -verbos

# start up
#CMD hive --service metastore &
#CMD hive --service hiveserver2 &

需要准备的目录和文件

1
2
$ ls
apache-hive-3.1.3-bin.tar.gz  Dockerfile  hive-conf/  hive-lib/

hive-conf/

1
2
$ ls hive-conf/
hive-site.xml

hive-site.xml

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
<configuration>
    <!-- 存储元数据mysql相关配置 -->
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://example.com:3306/hive3?createDatabaseIfNotExist=true&amp;useSSL=false&amp;useUnicode=true&amp;characterEncoding=UTF-8</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>root</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>your_RDBMS_password</value>
    </property>

    <!-- H2S运行绑定host -->
    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>h03</value>
    </property>

    <!-- 远程模式部署metastore metastore地址 -->
    <property>
        <name>hive.metastore.uris</name>
        <value>thrift://h03:9083</value>
    </property>

    <!-- 关闭元数据存储授权  -->
    <property>
        <name>hive.metastore.event.db.notification.api.auth</name>
        <value>false</value>
    </property>
</configuration>

hive-lib/

1
2
$ ls hive-lib/
mysql-connector-java-5.1.32.jar

mysql-connector-java-5.1.32.jar

官网Archives 下载

版本号可能不一样,可以选择最新的版本,也可以选择这里示例的版本

apache-hive-3.1.3-bin.tar.gz

官网Download CDN 下载

版本号可能不一样,可以选择最新的版本,也可以选择这里示例的版本

构建镜像

1
docker build -t yzy613/hadoop:3.3.6-hive3.1.3 .

启动容器

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
docker run -itd \
    --name h03 \
    --hostname h03 \
    --network hadoop \
    -v /opt/hadoop/fs/h03:/home/hadoop3/hadoop \
    yzy613/hadoop:3.3.6-hive3.1.3 \
    bash
docker attach h03
hive --service metastore &
hive --service hiveserver2 &
# ctrl + p + q 是仅退出容器但不停止容器
[^pq]