Greenplum 一主一备的配置,暂且定义主机为test01,备机为test02:
一.首先保证网络之间能够ping通,具体需要修改相对应的文件,具体操作如下:
Test01与test02主备机分别进行下面操作,进行网卡的配置,以及host进行域名解析
[root@test01 Desktop]# vi /etc/sysconfig/network-scripts/ifcfg-eth0
IPADDR=192.168.2.10
NETMASK=255.255.255.0
GATEWAY=192.168.2.1
ONBOOT=yes
BOOTPROTO=static
[root@test01 Desktop]# vi /etc/sysconfig/network
GATEWAY=192.168.2.1
[root@test01 Desktop]# vi /etc/hosts
192.168.2.10 test01
192.168.2.20 test02
关闭linux的防火墙以及selinux
[root@test01 Desktop]# service iptables stop 临时关闭,重启失效
[root@test01 Desktop]# chkconfig iptables off 永久关闭,重启生效
[root@test01 Desktop]# setenforce 0 临时关闭Selinux ,重启失效
[root@test01 Desktop]# vi /etc/selinux/config 修改一下内容,重启生效
SELINUX=disabled
[root@test01 Desktop]# service network restart 重启网络
[root@test01 Desktop]# ping test02 测试是否能够ping通相对应的主机。
二.系统参数配置
修改linux系统内核参数
[root@test02 Desktop]# vi /etc/sysctl.conf
# Kernel sysctl configuration file for Red Hat Linux
#
# For binary values, 0 is disabled, 1 is enabled. See sysctl(8) and
# sysctl.conf(5) for more details.
#
# Use '/sbin/sysctl -a' to list all possible parameters.
# Controls IP packet forwarding
net.ipv4.ip_forward = 0
# Controls source route verification
net.ipv4.conf.default.rp_filter = 1
# Do not accept source routing
net.ipv4.conf.default.accept_source_route = 0
# Controls the System Request debugging functionality of the kernel
kernel.sysrq = 1
# Controls whether core dumps will append the PID to the core filename.
# Useful for debugging multi-threaded applications.
kernel.core_uses_pid = 1
# Controls the use of TCP syncookies
net.ipv4.tcp_syncookies = 1
# Disable netfilter on bridges
#net.bridge.bridge-nf-call-ip6tables = 0
#net.bridge.bridge-nf-call-iptables = 0
#net.bridge.bridge-nf-call-arptables = 0
# Controls the default maxmimum size of a mesage queue
kernel.msgmnb = 65536
# Controls the maximum size of a message, in bytes
kernel.msgmax = 65536
kernel.msgmni = 2048
kernel.sem = 250 512000 100 2048
# Controls the maximum shared segment size, in bytes
#kernel.shmmax = 68719476736
kernel.shmmax = 500000000
kernel.shmmni = 4096
# Controls the maximum number of shared memory segments, in pages
#kernel.shmall = 4294967296
kernel.shmall = 4000000000
net.ipv4.tcp_tw_recycle=1
net.ipv4.tcp_max_syn_backlog=4096
net.ipv4.ip_local_port_range = 1025 65535
net.core.netdev_max_backlog=10000
vm.overcommit_memory=2
net.ipv4.conf.all.arp_filter = 1
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
修改Linux最大限制
[root@test01 Desktop]# vi /etc/security/limits.conf
#greenplum configs
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
设置预读块的值为16384
[root@test01 Desktop]# /sbin/blockdev --getra /dev/sda 查看预读块,默认大小为256
[root@test01 Desktop]# /sbin/blockdev --setra 16384 /dev/sda 设置预读块
调整IO策略
[root@test01 Desktop]# echo deadline > /sys/block/sda/queue/scheduler
(如果下面出现验证操作系统设置出现I/O异常,参考上一段修改相对应的磁盘IO)
三.GP安装 GP安装都在主节点下进行
[root@test01 Desktop]# useradd gpadmin
[root@test01 Desktop]# passwd gpadmin
修改环境变量
[root@test01 Desktop]# cd /home/gpadmin
[root@test01 gpadmin]# vi .bashrc
[root@test01 gpadmin]# vi .bash_profile
source /usr/local/greenplum-db/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data1/gpdata/master/gpseg-1
上传解压安装包
将greenplum-db-4.3.11.0-build-1-rhel5-x86_64.zip上传到/opt目录下并进行解压
执行相对应的脚本,按提示输入回车或yes 这一步会将安装包解压到/usr/local/下,并建立软连接greenplum-db
[root@test01 opt]# ./greenplum-db-4.3.11.0-build-1-rhel5-x86_64.bin
准备节点服务器信息
[root@test01 opt]# mkdir -p /opt/gpinit/
[root@test01 gpinit]# vi all_host
[root@test01 gpinit]# vi all_segment
test01
test02
建立服务器之间的信任,按照提示输入相对应的root密码
[root@test01 ~]# source /usr/local/greenplum-db/greenplum_path.sh
[root@test01 ~]# gpssh-exkeys -f /opt/gpinit/all_host
批量安装,并批量校验安装情况,返回各个节点目录情况一致就正确了
[root@test01 ~]# gpseginstall -f /opt/gpinit/all_host -u gpadmin -p gpadmin
[root@test01 ~]# gpssh -f /opt/gpinit/all_host -e ls -l $GPHOME
创建存储节点
Master
[root@test01 ~]# mkdir -p /data1/gpdata/master
[root@test01 ~]# chown gpadmin:gpadmin /data1/gpdata/master
Segment
[root@test01 ~]# gpssh -f /opt/gpinit/all_host -e 'mkdir -p /data1/gpdata/primary'
[root@test01 ~]# gpssh -f /opt/gpinit/all_host -e 'chown gpadmin:gpadmin /data1/gpdata/primary'
Mirror
[root@test01 ~]# gpssh -f /opt/gpinit/all_segment -e 'mkdir -p /data1/gpdata/mirror'
[root@test01 ~]# gpssh -f /opt/gpinit/all_segment -e 'chown gpadmin:gpadmin /data1/gpdata/mirror'
设置时钟同步
[root@test01 ~]# vi /etc/ntp.conf
server 192.168.2.10
server 192.168.2.20
重启ntpd服务 /etc/init.d/ntpd restart
查看ntp同步情况 ntpq -p
使ntpd服务重启服务器后也启动 chkconfig --level 0123456 ntpd on
校验系统设置
[root@test01 ~]# gpcheck -f /opt/gpinit/all_host -m test01(如有错误需要一一排查)
例如以下错误
20170215:00:22:13:004992 gpcheck:test01:root-[ERROR]:-GPCHECK_ERROR host(test02): on device (sr0) IO scheduler 'cfq' does not match expected value 'deadline'
20170215:00:22:13:004992 gpcheck:test01:root-[ERROR]:-GPCHECK_ERROR host(test01): on device (sr0) IO scheduler 'cfq' does not match expected value 'deadline'
echo deadline > /sys/block/sr0/queue/scheduler这个错误就可解决
创建GP初始化文件并进行修改
[gpadmin@test01 ~]$ mkdir /home/gpadmin/gpconfigs
[gpadmin@test01 ~]$ cp /usr/local/greenplum-db/docs/cli_help/gpconfigs/gpinitsystem_config /home/gpadmin/gpconfigs/gpinitsystem_config
[gpadmin@test01 ~]$ cd gpconfigs/
[gpadmin@test01 gpconfigs]$ chmod 775 gpinitsystem_config
[gpadmin@test01 gpconfigs]$ vi gpinitsystem_config
# FILE NAME: gpinitsystem_config
# Configuration file needed by the gpinitsystem
################################################
#### REQUIRED PARAMETERS
################################################
#### Name of this Greenplum system enclosed in quotes.
ARRAY_NAME="BJ Greenplum DW"
#### Naming convention for utility-generated data directories.
SEG_PREFIX=gpseg
#### Base number by which primary segment port numbers
#### are calculated.
PORT_BASE=40000
#### File system location(s) where primary segment data directories
#### will be created. The number of locations in the list dictate
#### the number of primary segments that will get created per
#### physical host (if multiple addresses for a host are listed in
#### the hostfile, the number of segments will be spread evenly across
#### the specified interface addresses).
declare -a DATA_DIRECTORY=(/data1/gpdata/primary /data1/gpdata/primary)
#### OS-configured hostname or IP address of the master host.
MASTER_HOSTNAME=test01
#### File system location where the master data directory
#### will be created.
MASTER_DIRECTORY=/data1/gpdata/master
#### Port number for the master instance.
MASTER_PORT=5432
#### Shell utility used to connect to remote hosts.
TRUSTED_SHELL=ssh
#### Maximum log file segments between automatic WAL checkpoints.
CHECK_POINT_SEGMENTS=8
#### Default server-side character set encoding.
ENCODING=UNICODE
################################################
#### OPTIONAL MIRROR PARAMETERS
################################################
#### Base number by which mirror segment port numbers
#### are calculated.
MIRROR_PORT_BASE=50000
#### Base number by which primary file replication port
#### numbers are calculated.
REPLICATION_PORT_BASE=41000
#### Base number by which mirror file replication port
#### numbers are calculated.
MIRROR_REPLICATION_PORT_BASE=51000
#### File system location(s) where mirror segment data directories
#### will be created. The number of mirror locations must equal the
#### number of primary locations as specified in the
#### DATA_DIRECTORY parameter.
declare -a MIRROR_DATA_DIRECTORY=(/data1/gpdata/mirror /data1/gpdata/mirror)
################################################
#### OTHER OPTIONAL PARAMETERS
################################################
#### Create a database of this name after initialization.
DATABASE_NAME=bj_gp
#### Specify the location of the host address file here instead of
#### with the the -h option of gpinitsystem.
#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem
初始化GP
[gpadmin@test01 ~]$ gpinitsystem -c /home/gpadmin/gpconfigs/gpinitsystem_config -h /opt/gpinit/all_host
中间需要输入一次:Y
四.增加附加项
增加standby
在test02备机上
[root@test02 ~]# mkdir /data1/gpdata/master
[root@test02 ~]# chown gpadmin:gpadmin /data1/gpdata/master
再test01主机上执行该命令,中间输入一次Y
[gpadmin@test01 gpdata]$ gpinitstandby -s test02
增加mirror(镜像)前面初始化文件遗漏了才需要这一步,中间需要输入两次/data1/gpdata/mirror:
[gpadmin@test01 gpdata]$ gpaddmirrors -p 1000
设置访问权限:
打开/data1/gpdata/master/gpseg-1/pg_hba.conf 按照最下面的格式添加客户端ip或网段
#user define
host all all 192.168.1.0/24 trust
host all all 127.0.0.1/28 trust
访问方式:
[gpadmin@test01 gpdata]$ psql -d test_db -h test01 -p 5432 -U gpadmin
实例恢复:gprecoverseg
通过gpstate 或gp_configuration 发现有实例down 掉以后,使用该命令进行回复。
GP常用命令
su - gpadmin
gpstart #正常启动
gpstop #正常关闭
gpstop -M fast #快速关闭
gpstop –r #重启
gpstop –u #重新加载配置文件
登陆与退出Greenplum
#正常登陆
psql gpdb
psql -d gpdb -h gphostm -p 5432 -U gpadmin
#使用utility方式
PGOPTIONS="-c gp_session_role=utility" psql -h -d dbname hostname -p port
#退出
在psql命令行执行\q
# 写权限
GRANT INSERT ON writable_ext_table TO ;
# 写数据
INSERT INTO writable_ext_table SELECT * FROM regular_table;
copy
COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/home/gpadmin/a_list_countries.out';
参数查询
psql -c 'SHOW ALL;' -d gpdb
gpconfig --show max_connections
创建数据库
createdb -h localhost -p 5432 dhdw
创建GP文件系统
# 文件系统名
gpfsdw
# 子节点,视segment数创建目录
mkdir -p /gpfsdw/seg1
mkdir -p /gpfsdw/seg2
chown -R gpadmin:gpadmin /gpfsdw
# 主节点
mkdir -p /gpfsdw/master
chown -R gpadmin:gpadmin /gpfsdw
gpfilespace -o gpfilespace_config
gpfilespace -c gpfilespace_config
创建GP表空间
psql gpdb
create tablespace TBS_DW_DATA filespace gpfsdw;
SET default_tablespace = TBS_DW_DATA;
删除GP数据库
gpdeletesystem -d /gpmaster/gpseg-1 -f
查看segment配置
select * from gp_segment_configuration;
文件系统
select * from pg_filespace_entry;
磁盘、数据库空间
SELECT * FROM gp_toolkit.gp_disk_free ORDER BY dfsegment;
SELECT * FROM gp_toolkit.gp_size_of_database ORDER BY sodddatname;
日志
SELECT * FROM gp_toolkit.__gp_log_master_ext;
SELECT * FROM gp_toolkit.__gp_log_segment_ext;
表描述
/d+
表分析
VACUUM ANALYZE tablename;
表数据分布
SELECT gp_segment_id, count(*) FROM GROUP BY gp_segment_id;
表占用空间
SELECT relname as name, sotdsize/1024/1024 as size_MB, sotdtoastsize as toast, sotdadditionalsize as other
FROM gp_toolkit.gp_size_of_table_disk as sotd, pg_class
WHERE sotd.sotdoid = pg_class.oid ORDER BY relname;
索引占用空间
SELECT soisize/1024/1024 as size_MB, relname as indexname
FROM pg_class, gp_toolkit.gp_size_of_index
WHERE pg_class.oid = gp_size_of_index.soioid
AND pg_class.relkind='i';
OBJECT的操作统计
SELECT schemaname as schema, objname as table, usename as role, actionname as action, subtype as type, statime as time
FROM pg_stat_operations
WHERE objname = '';
锁
SELECT locktype, database, c.relname, l.relation, l.transactionid, l.transaction, l.pid, l.mode, l.granted, a.current_query
FROM pg_locks l, pg_class c, pg_stat_activity a
WHERE l.relation=c.oid
AND l.pid=a.procpid
ORDER BY c.relname;
队列
SELECT * FROM pg_resqueue_status;
加载(LOAD)数据到Greenplum数据库
gpfdist外部表
# 启动服务
gpfdist -d /share/txt -p 8081 –l /share/txt/gpfdist.log &
# 创建外部表,分隔符为’/t’
drop EXTERNAL TABLE TD_APP_LOG_BUYER;
CREATE EXTERNAL TABLE TD_APP_LOG_BUYER (
IP text,
ACCESSTIME text,
REQMETHOD text,
URL text,
STATUSCODE int,
REF text,
name text,
VID text)
LOCATION ('gpfdist://gphostm:8081/xxx.txt')
FORMAT 'TEXT' (DELIMITER E'/t'
FILL MISSING FIELDS) SEGMENT REJECT LIMIT 1 percent;
# 创建普通表
create table test select * from TD_APP_LOG_BUYER;
# 索引
# CREATE INDEX idx_test ON test USING bitmap (ip);
# 查询数据
select ip , count(*) from test group by ip order by count(*);
gpload
# 创建控制文件
# 加载数据
gpload -f my_load.yml
copy
COPY country FROM '/data/gpdb/country_data'
WITH DELIMITER '|' LOG ERRORS INTO err_country
SEGMENT REJECT LIMIT 10 ROWS;
从Greenplum数据库卸载(UNLOAD)数据
gpfdist外部表
# 创建可写外部表
CREATE WRITABLE EXTERNAL TABLE unload_expenses
( LIKE expenses )
LOCATION ('gpfdist://etlhost-1:8081/expenses1.out',
'gpfdist://etlhost-2:8081/expenses2.out')
FORMAT 'TEXT' (DELIMITER ',')
DISTRIBUTED BY (exp_id);
执行sql文件
psql gpdbname –f yoursqlfile.sql
或者psql登陆后执行
\i yoursqlfile.sql
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/30604784/viewspace-2130601/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/30604784/viewspace-2130601/