Bootstrap

hue 4.11容器化部署,已结合Hive与Hadoop

配合《Hue 部署过程中的报错处理》食用更佳

官方配置说明页面:
https://docs.gethue.com/administrator/configuration/connectors/
官方配置hue.ini页面
https://github.com/cloudera/hue/blob/master/desktop/conf.dist/hue.ini

docker部署

注意

  • 初次部署需要先注释hue.ini配置文件的映射,待到部署成功后,将配置文件拷贝到指定目录后,再取消注释进行部署即可。
    或者到该地址下去复制并新建hue.ini,放到/data/hue/目录下(推荐)。https://github.com/cloudera/hue/blob/master/desktop/conf.dist/hue.ini
  • 如果已部署hive与hadoop,最好部署hue容器之前将文件映射配置修改完整
  • 如果未部署hive与Hadoop,忽略即可,后续有需要时再回来修改配置并重新部署hue容器即可
version: '3.3'  # 指定Docker Compose的版本
services:  # 定义服务列表
  hue:  # 服务的名称
    image: gethue/hue:4.11.0  # 指定使用的镜像及其版本
    container_name: hue  # 设置容器的名称
    restart: always  # 配置容器的重启策略为总是重启
    privileged: true  # 给予容器特权模式,拥有更多权限
    hostname: hue  # 设置容器的主机名
    ports:  # 端口映射
      - "9898:8888"  # 将容器的8888端口映射到宿主机的9898端口
    environment:  # 环境变量设置
      - TZ=Asia/Shanghai  # 设置时区为中国上海
    volumes:  # 数据卷配置
      - /data/hue/hue.ini:/usr/share/hue/desktop/conf/hue.ini  # 将宿主机的hue.ini配置文件映射到容器内的指定位置
      - /etc/localtime:/etc/localtime  # 将宿主机的本地时间设置映射到容器内,确保容器与宿主机时间一致
      - /opt/hive-3.1.3:/opt/hive-3.1.3 # 将宿主机的hive目录映射到容器内
      - /opt/hadoop-3.3.0:/opt/hadoop-3.3.0 # 将宿主机的hadoop目录映射到容器内
    networks:
      hue_default:
        ipv4_address: 172.15.0.2  # 指定静态IP地址

networks:  # 定义网络
  hue_default:
    driver: bridge  # 使用bridge驱动
    ipam:
      config:
        - subnet: 172.15.0.0/16  # 指定子网范围
# 1. 创建hue部署文件
vi docker-compose-hue.yml

# 2. 将上方的部署内容复制粘贴到docker-compose-hue.yml中
# 3. 使用docker compose部署hue
docker compose -f docker-compose-hue.yml  up -d

# 4. 检验是否部署成功,同一局域网下的浏览器访问hue
# 由于这是您第一次登录,请选择任意用户名和密码。请务必记住这些,因为它们将成为您的 Hue 超级用户凭据。
主机ip:9898

hue初始化

  1. hue.ini 配置文件修改
vi /data/hue/hue.ini
# 修改Webserver监听地址
 http_host=127.0.0.1

# 修改时区
 time_zone=Asia/Shanghai

# 配置hue元数据数据库存储
[[database]]
 engine=mysql
 host=192.168.10.75
 port=3306
 user=root
 password=HoMf@123
 name=hue
 
# 配置hue数据库连接,用于查询Mysql数据库
[[interpreters]]
# Define the name and how to connect and execute the language.
# https://docs.gethue.com/administrator/configuration/editor/

 [[[mysql]]]
   name = MySQL
   interface=sqlalchemy
#   ## https://docs.sqlalchemy.org/en/latest/dialects/mysql.html
   options='{"url": "mysql://root:HoMf@[email protected]:3306/hue_meta"}'
#   ## options='{"url": "mysql://${USER}:${PASSWORD}@localhost:3306/hue"}'
  1. 创建数据库
# 进入数据库
mysql -uroot -pHoMf@123

# 创建hue数据库
mysql> create database `hue` default character set utf8mb4 default collate utf8mb4_general_ci;
Query OK, 1 row affected (0.00 sec)

# 创建hue_meta数据库
mysql> create database `hue_meta` default character set utf8mb4 default collate utf8mb4_general_ci;
Query OK, 1 row affected (0.00 sec)
  1. 应用配置文件,重启容器,进入容器,数据库初始化
# 重启hue容器
docker restart hue

# 进入hue容器
docker exec -it hue bash

# 执行数据库初始化
/usr/share/hue/build/env/bin/hue syncdb
/usr/share/hue/build/env/bin/hue migrate

# 退出容器,ctrl + D,或
exit

  • 数据库初始化详细操作及返回结果
hue@hue:/usr/share/hue$ /usr/share/hue/build/env/bin/hue syncdb
[22/Nov/2024 15:38:07 +0800] settings     INFO     Welcome to Hue 4.11.0
[22/Nov/2024 15:38:08 +0800] conf         WARNING  enable_extract_uploaded_archive is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[22/Nov/2024 15:38:08 +0800] conf         WARNING  enable_new_create_table is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[22/Nov/2024 15:38:08 +0800] conf         WARNING  force_hs2_metadata is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[22/Nov/2024 15:38:08 +0800] conf         WARNING  show_table_erd is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[21/Nov/2024 23:38:08 -0800] backend      WARNING  mozilla_django_oidc module not found
[21/Nov/2024 23:38:09 -0800] apps         INFO     AXES: BEGIN LOG
[21/Nov/2024 23:38:09 -0800] apps         INFO     AXES: Using django-axes version 5.13.0
[21/Nov/2024 23:38:09 -0800] apps         INFO     AXES: blocking by IP only.
[21/Nov/2024 23:38:09 -0800] api3         WARNING  simple_salesforce module not found
[21/Nov/2024 23:38:09 -0800] jdbc         WARNING  Failed to import py4j
[21/Nov/2024 23:38:10 -0800] schemas      INFO     Resource 'XMLSchema.xsd' is already loaded
No changes detected
hue@hue:/usr/share/hue$ /usr/share/hue/build/env/bin/hue migrate
[22/Nov/2024 15:38:33 +0800] settings     INFO     Welcome to Hue 4.11.0
[22/Nov/2024 15:38:33 +0800] conf         WARNING  enable_extract_uploaded_archive is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[22/Nov/2024 15:38:33 +0800] conf         WARNING  enable_new_create_table is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[22/Nov/2024 15:38:33 +0800] conf         WARNING  force_hs2_metadata is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[22/Nov/2024 15:38:33 +0800] conf         WARNING  show_table_erd is of type bool. Resetting it as type 'coerce_bool'. Please fix it permanently
[21/Nov/2024 23:38:33 -0800] backend      WARNING  mozilla_django_oidc module not found
[21/Nov/2024 23:38:34 -0800] apps         INFO     AXES: BEGIN LOG
[21/Nov/2024 23:38:34 -0800] apps         INFO     AXES: Using django-axes version 5.13.0
[21/Nov/2024 23:38:34 -0800] apps         INFO     AXES: blocking by IP only.
[21/Nov/2024 23:38:34 -0800] api3         WARNING  simple_salesforce module not found
[21/Nov/2024 23:38:34 -0800] jdbc         WARNING  Failed to import py4j
[21/Nov/2024 23:38:35 -0800] schemas      INFO     Resource 'XMLSchema.xsd' is already loaded
Operations to perform:
  Apply all migrations: auth, authtoken, axes, beeswax, contenttypes, desktop, jobsub, oozie, pig, sessions, sites, useradmin
Running migrations:
  Applying contenttypes.0001_initial... OK
  Applying contenttypes.0002_remove_content_type_name... OK
  Applying auth.0001_initial... OK
  Applying auth.0002_alter_permission_name_max_length... OK
  Applying auth.0003_alter_user_email_max_length... OK
  Applying auth.0004_alter_user_username_opts... OK
  Applying auth.0005_alter_user_last_login_null... OK
  Applying auth.0006_require_contenttypes_0002... OK
  Applying auth.0007_alter_validators_add_error_messages... OK
  Applying auth.0008_alter_user_username_max_length... OK
  Applying auth.0009_alter_user_last_name_max_length... OK
  Applying auth.0010_alter_group_name_max_length... OK
  Applying auth.0011_update_proxy_permissions... OK
  Applying auth.0012_alter_user_first_name_max_length... OK
  Applying authtoken.0001_initial... OK
  Applying authtoken.0002_auto_20160226_1747... OK
  Applying authtoken.0003_tokenproxy... OK
  Applying axes.0001_initial... OK
  Applying axes.0002_auto_20151217_2044... OK
  Applying axes.0003_auto_20160322_0929... OK
  Applying axes.0004_auto_20181024_1538... OK
  Applying axes.0005_remove_accessattempt_trusted... OK
  Applying axes.0006_remove_accesslog_trusted... OK
  Applying axes.0007_add_accesslog_trusted... OK
  Applying axes.0008_remove_accesslog_trusted... OK
  Applying beeswax.0001_initial... OK
  Applying beeswax.0002_auto_20200320_0746... OK
  Applying beeswax.0003_compute_namespace... OK
  Applying desktop.0001_initial... OK
  Applying desktop.0002_initial... OK
  Applying desktop.0003_initial... OK
  Applying desktop.0004_initial... OK
  Applying desktop.0005_initial... OK
  Applying desktop.0006_initial... OK
  Applying desktop.0007_initial... OK
  Applying desktop.0008_auto_20191031_0704... OK
  Applying desktop.0009_auto_20191202_1056... OK
  Applying desktop.0010_auto_20200115_0908... OK
  Applying desktop.0011_document2_connector... OK
  Applying desktop.0012_connector_interface... OK
  Applying desktop.0013_alter_document2_is_trashed... OK
  Applying jobsub.0001_initial... OK
  Applying oozie.0001_initial... OK
  Applying oozie.0002_initial... OK
  Applying oozie.0003_initial... OK
  Applying oozie.0004_initial... OK
  Applying oozie.0005_initial... OK
  Applying oozie.0006_auto_20200714_1204... OK
  Applying oozie.0007_auto_20210126_2113... OK
  Applying oozie.0008_auto_20210216_0216... OK
  Applying pig.0001_initial... OK
  Applying pig.0002_auto_20200714_1204... OK
  Applying sessions.0001_initial... OK
  Applying sites.0001_initial... OK
  Applying sites.0002_alter_domain_unique... OK
  Applying useradmin.0001_initial... OK
  Applying useradmin.0002_userprofile_json_data... OK
  Applying useradmin.0003_auto_20200203_0802... OK
  Applying useradmin.0004_userprofile_hostname... OK
[21/Nov/2024 23:38:42 -0800] models       INFO     HuePermissions: 34 added, 0 updated, 0 up to date, 0 stale, 0 deleted
  1. 查看数据库表是否创建成功,判断数据库是否初始化完成
# 进入数据库
mysql -uroot -pHoMf@123

# 进入hue数据库
mysql> use hue;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed

# 查看hue数据库内的数据库表
mysql> show tables;
+--------------------------------+
| Tables_in_hue                  |
+--------------------------------+
| auth_group                     |
| auth_group_permissions         |
| auth_permission                |
| auth_user                      |
| auth_user_groups               |
| auth_user_user_permissions     |
| authtoken_token                |
| axes_accessattempt             |
| axes_accesslog                 |
| beeswax_compute                |
| beeswax_metainstall            |
| beeswax_namespace              |
| beeswax_queryhistory           |
| beeswax_savedquery             |
| beeswax_session                |
| defaultconfiguration_groups    |
| desktop_connector              |
| desktop_defaultconfiguration   |
| desktop_document               |
| desktop_document2              |
| desktop_document2_dependencies |
| desktop_document2permission    |
| desktop_document_tags          |
| desktop_documentpermission     |
| desktop_documenttag            |
| desktop_settings               |
| desktop_userpreferences        |
| django_content_type            |
| django_migrations              |
| django_session                 |
| django_site                    |
| documentpermission2_groups     |
| documentpermission2_users      |
| documentpermission_groups      |
| documentpermission_users       |
| jobsub_checkforsetup           |
| jobsub_jobdesign               |
| jobsub_jobhistory              |
| jobsub_oozieaction             |
| jobsub_ooziedesign             |
| jobsub_ooziejavaaction         |
| jobsub_ooziemapreduceaction    |
| jobsub_ooziestreamingaction    |
| oozie_bundle                   |
| oozie_bundledcoordinator       |
| oozie_coordinator              |
| oozie_datainput                |
| oozie_dataoutput               |
| oozie_dataset                  |
| oozie_decision                 |
| oozie_decisionend              |
| oozie_distcp                   |
| oozie_email                    |
| oozie_end                      |
| oozie_fork                     |
| oozie_fs                       |
| oozie_generic                  |
| oozie_history                  |
| oozie_hive                     |
| oozie_java                     |
| oozie_job                      |
| oozie_join                     |
| oozie_kill                     |
| oozie_link                     |
| oozie_mapreduce                |
| oozie_node                     |
| oozie_pig                      |
| oozie_shell                    |
| oozie_sqoop                    |
| oozie_ssh                      |
| oozie_start                    |
| oozie_streaming                |
| oozie_subworkflow              |
| oozie_workflow                 |
| pig_document                   |
| pig_pigscript                  |
| useradmin_grouppermission      |
| useradmin_huepermission        |
| useradmin_ldapgroup            |
| useradmin_userprofile          |
+--------------------------------+
80 rows in set (0.01 sec)

部署检验

此时应可以正常进入hue界面,并且Sources列表内能够看到MySql
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

配置hive

请先添加好文件映射,确保在容器内也可以访问到hive的配置文件目录
修改hue.ini配置文件,修改后重启容器即可
配置成功后即可在hue界面内的Sources列表内能够看到Hive
在这里插入图片描述

# 修改hue配置文件
vi /data/hue/hue.ini
# 取消注释即可
 [[[hive]]]
   name=Hive
   interface=hiveserver2

[beeswax]

# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
# hive主机地址
 hive_server_host=192.168.10.75

# Binary thrift port for HiveServer2.
# hive-site.xml中的hive.server2.thrift.port配置
 hive_server_port=10000
 
 # Hive configuration directory, where hive-site.xml is located
 # hive配置文件目录,若为容器部署,则需要配置文件映射
 hive_conf_dir=/opt/hive-3.1.3/conf

# Timeout in seconds for thrift calls to Hive service
 server_conn_timeout=120
 
 # Override the default desktop username and password of the hue user used for authentications with other services.
# e.g. Used for LDAP/PAM pass-through authentication.
 auth_username=hue
 auth_password=root
 
 [metastore]
# Flag to turn on the new version of the create table wizard.
 enable_new_create_table=true
  • 附上我的hive-site.xml配置
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-->
<configuration>
        <property>
                <name>hive.metastore.warehouse.dir</name>
                <value>/user/hive-3.1.3/warehouse</value>
                <description/>
        </property>

        <property>
                <name>javax.jdo.option.ConnectionURL</name>
                <value>jdbc:mysql://Linux-Master:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false&amp;allowPublicKeyRetrieval=true</value>
                <description>数据库连接</description>
        </property>

        <property>
                <name>javax.jdo.option.ConnectionDriverName</name>
                <value>com.mysql.jdbc.Driver</value>
                <description/>
        </property>

        <property>
                <name>javax.jdo.option.ConnectionUserName</name>
                <value>root</value>
                <description/>
        </property>

        <property>
                <name>javax.jdo.option.ConnectionPassword</name>
                <value>HoMf@123</value>
                <description/>
        </property>

        <property>
                <name>hive.querylog.location</name>
                <value>/home/hadoop/logs/hive-3.1.3/job-logs/${user.name}</value>
                <description>Location of Hive run time structured log file</description>
        </property>

        <property>
                <name>hive.exec.scratchdir</name>
                <value>/user/hive-3.1.3/tmp</value>
        </property>
       
        <property>
                <name>hive.server2.thrift.port</name>
                <value>11000</value>
        </property>
</configuration>

配置 Hadoop

配置 HDFS

修改hue.ini

# 修改hue配置文件
vi /data/hue/hue.ini
# hdfs集群配置
[[hdfs_clusters]]
  # HA support by using HttpFs

[[[default]]]
 fs_defaultfs=hdfs://192.168.10.75:9000
 webhdfs_url=http://192.168.10.75:9870/webhdfs/v1
 hadoop_conf_dir=/opt/hadoop-3.3.0/etc/hadoop
 
 
# yarn集群配置
[[yarn_clusters]]

[[[default]]]
# Enter the host on which you are running the ResourceManager
 resourcemanager_host=192.168.10.75

# The port where the ResourceManager IPC listens on
 resourcemanager_port=8032

# Whether to submit jobs to this cluster
submit_to=True

# Resource Manager logical name (required for HA)
## logical_name=

# Change this if your YARN cluster is Kerberos-secured
## security_enabled=false

# URL of the ResourceManager API
 resourcemanager_api_url=http://192.168.10.75:8088

# URL of the ProxyServer API
 proxy_api_url=http://192.168.10.75:8088

# URL of the HistoryServer API
 history_server_api_url=http://localhost:19888

修改hadoop的core-site.xml

<!—允许通过 httpfs 方式访问 hdfs 的主机名 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<!—允许通过 httpfs 方式访问 hdfs 的用户组 -->
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
  • 这里附上我的core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                <name>hadoop.tmp.dir</name>
                        <value>/opt/hadoop-3.3.0/tmp</value>
                        <description>Abase for other temporary directories.</description>
        </property>

        <property>
                <name>fs.defaultFS</name>
                        <!-- IP地址为主节点服务器地址 -->
                        <value>hdfs://192.168.10.75:9000</value>
        </property>

        <property>
                <name>hadoop.proxyuser.root.hosts</name>
                <value>*</value>
        </property>

        <property>
                <name>hadoop.proxyuser.root.groups</name>
                <value>*</value>
        </property>
        <property>
                <name>hadoop.proxyuser.hue.hosts</name>
                <value>*</value>
        </property>

        <property>
                <name>hadoop.proxyuser.hue.groups</name>
                <value>*</value>
        </property>
</configuration>

修改hadoop的hdfs-site.xml

<property>
 <name>dfs.webhdfs.enabled</name>
 <value>true</value>
</property>
  • 这里附上我的hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <!-- nn web 端访问地址-->
        <property>
                <name>dfs.namenode.http-address</name>
                <!-- master-ubuntu需要改为主节点机器名-->
                <value>linux-master:9870</value>
        </property>

        <!-- 2nn web 端访问地址-->
        <property>
                <name>dfs.namenode.secondary.http-address</name>
                <!-- slave1-ubuntu需要改为第一从节点台机器名-->
                <value>linux-slave01:9868</value>
        </property>

        <property>
                <name>dfs.permissions.enabled</name>
                <value>false</value>
        </property>
        <property>
                <name>dfs.webhdfs.enable</name>
                <value>true</value>
        </property>
</configuration>

配置yarn集群

  • 配置hadoop的yarn-site.xml
        <!-- 开启日志聚集功能 -->
        <property>
                <name>yarn.log-aggregation-enable</name>
                <value>true</value>
        </property>

        <!-- 设置日志聚集服务器地址 -->
        <property>
                <name>yarn.log.server.url</name>
                <!-- IP地址为主节点机器地址 -->
                <value>http://192.168.10.75:19888/jobhistory/logs</value>
        </property>

        <!-- 设置日志保留时间为 7 天 -->
        <property>
                <name>yarn.log-aggregation.retain-seconds</name>
                <value>604800</value>
        </property>
  • 这里附上我的yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->

        <!-- 指定 MR 走 shuffle -->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>

        <!-- 指定 ResourceManager 的地址-->
        <property>
                <name>yarn.resourcemanager.hostname</name>
                <!-- master-ubuntu需要改为为主节点机器名-->
                <value>linux-master</value>
        </property>

        <!-- 环境变量的继承 -->
        <property>
                <name>yarn.nodemanager.env-whitelist</name>
                <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CO NF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
        </property>

        <!-- 开启日志聚集功能 -->
        <property>
                <name>yarn.log-aggregation-enable</name>
                <value>true</value>
        </property>

        <!-- 设置日志聚集服务器地址 -->
        <property>
                <name>yarn.log.server.url</name>
                <!-- IP地址为主节点机器地址 -->
                <value>http://192.168.10.75:19888/jobhistory/logs</value>
        </property>

        <!-- 设置日志保留时间为 7 天 -->
        <property>
                <name>yarn.log-aggregation.retain-seconds</name>
                <value>604800</value>
        </property>
</configuration>
;