Bootstrap

Greenplum安装以及常用命令


Greenplum 一主一备的配置,暂且定义主机为test01,备机为test02

一.首先保证网络之间能够ping通,具体需要修改相对应的文件,具体操作如下:

 

Test01test02主备机分别进行下面操作,进行网卡的配置,以及host进行域名解析

[root@test01 Desktop]# vi /etc/sysconfig/network-scripts/ifcfg-eth0  

IPADDR=192.168.2.10

NETMASK=255.255.255.0

GATEWAY=192.168.2.1

ONBOOT=yes

BOOTPROTO=static


[root@test01 Desktop]# vi /etc/sysconfig/network

GATEWAY=192.168.2.1


[root@test01 Desktop]# vi /etc/hosts

192.168.2.10 test01

192.168.2.20 test02


关闭linux的防火墙以及selinux

[root@test01 Desktop]# service iptables stop  临时关闭,重启失效

[root@test01 Desktop]# chkconfig iptables off 永久关闭,重启生效

[root@test01 Desktop]# setenforce 0 临时关闭Selinux ,重启失效

[root@test01 Desktop]# vi /etc/selinux/config  修改一下内容,重启生效

SELINUX=disabled


[root@test01 Desktop]# service network restart 重启网络

[root@test01 Desktop]# ping test02 测试是否能够ping通相对应的主机。

二.系统参数配置

 

修改linux系统内核参数

[root@test02 Desktop]# vi /etc/sysctl.conf

# Kernel sysctl configuration file for Red Hat Linux

#

# For binary values, 0 is disabled, 1 is enabled.  See sysctl(8) and

# sysctl.conf(5) for more details.

#

# Use '/sbin/sysctl -a' to list all possible parameters.


# Controls IP packet forwarding

net.ipv4.ip_forward = 0


# Controls source route verification

net.ipv4.conf.default.rp_filter = 1


# Do not accept source routing

net.ipv4.conf.default.accept_source_route = 0


# Controls the System Request debugging functionality of the kernel

kernel.sysrq = 1


# Controls whether core dumps will append the PID to the core filename.

# Useful for debugging multi-threaded applications.

kernel.core_uses_pid = 1


# Controls the use of TCP syncookies

net.ipv4.tcp_syncookies = 1


# Disable netfilter on bridges

#net.bridge.bridge-nf-call-ip6tables = 0

#net.bridge.bridge-nf-call-iptables = 0

#net.bridge.bridge-nf-call-arptables = 0


# Controls the default maxmimum size of a mesage queue

kernel.msgmnb = 65536


# Controls the maximum size of a message, in bytes

kernel.msgmax = 65536

kernel.msgmni = 2048


kernel.sem = 250 512000 100 2048


# Controls the maximum shared segment size, in bytes

#kernel.shmmax = 68719476736

kernel.shmmax = 500000000

kernel.shmmni = 4096


# Controls the maximum number of shared memory segments, in pages

#kernel.shmall = 4294967296

kernel.shmall = 4000000000


net.ipv4.tcp_tw_recycle=1

net.ipv4.tcp_max_syn_backlog=4096

net.ipv4.ip_local_port_range = 1025 65535

net.core.netdev_max_backlog=10000

vm.overcommit_memory=2

net.ipv4.conf.all.arp_filter = 1

net.core.rmem_max = 2097152

net.core.wmem_max = 2097152


修改Linux最大限制

[root@test01 Desktop]# vi /etc/security/limits.conf

 #greenplum configs

* soft nofile 65536

* hard nofile 65536

* soft nproc 131072

* hard nproc 131072


设置预读块的值为16384

[root@test01 Desktop]# /sbin/blockdev --getra /dev/sda  查看预读块,默认大小为256

[root@test01 Desktop]# /sbin/blockdev --setra 16384 /dev/sda 设置预读块


调整IO策略

[root@test01 Desktop]# echo deadline > /sys/block/sda/queue/scheduler

(如果下面出现验证操作系统设置出现I/O异常,参考上一段修改相对应的磁盘IO


三.GP安装 GP安装都在主节点下进行

[root@test01 Desktop]# useradd gpadmin

[root@test01 Desktop]# passwd gpadmin


修改环境变量

[root@test01 Desktop]# cd /home/gpadmin

[root@test01 gpadmin]# vi .bashrc

[root@test01 gpadmin]# vi .bash_profile

source /usr/local/greenplum-db/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data1/gpdata/master/gpseg-1


上传解压安装包

greenplum-db-4.3.11.0-build-1-rhel5-x86_64.zip上传到/opt目录下并进行解压

执行相对应的脚本,按提示输入回车或yes 这一步会将安装包解压到/usr/local/下,并建立软连接greenplum-db

[root@test01 opt]# ./greenplum-db-4.3.11.0-build-1-rhel5-x86_64.bin


准备节点服务器信息

[root@test01 opt]# mkdir -p /opt/gpinit/

[root@test01 gpinit]# vi all_host

[root@test01 gpinit]# vi all_segment

 test01

 test02


建立服务器之间的信任,按照提示输入相对应的root密码

[root@test01 ~]# source /usr/local/greenplum-db/greenplum_path.sh

[root@test01 ~]# gpssh-exkeys -f /opt/gpinit/all_host

 

批量安装,并批量校验安装情况,返回各个节点目录情况一致就正确了

[root@test01 ~]# gpseginstall -f /opt/gpinit/all_host -u gpadmin -p gpadmin

[root@test01 ~]#  gpssh -f /opt/gpinit/all_host -e ls -l $GPHOME

 

创建存储节点

Master

[root@test01 ~]# mkdir -p /data1/gpdata/master

[root@test01 ~]# chown gpadmin:gpadmin /data1/gpdata/master

Segment

[root@test01 ~]# gpssh -f /opt/gpinit/all_host -e 'mkdir -p /data1/gpdata/primary'

[root@test01 ~]# gpssh -f /opt/gpinit/all_host -e 'chown gpadmin:gpadmin /data1/gpdata/primary'

Mirror

[root@test01 ~]# gpssh -f /opt/gpinit/all_segment -e 'mkdir -p /data1/gpdata/mirror'

[root@test01 ~]# gpssh -f /opt/gpinit/all_segment -e 'chown gpadmin:gpadmin /data1/gpdata/mirror'

 

设置时钟同步

[root@test01 ~]# vi /etc/ntp.conf

server 192.168.2.10

server 192.168.2.20

重启ntpd服务 /etc/init.d/ntpd restart

查看ntp同步情况 ntpq -p

使ntpd服务重启服务器后也启动 chkconfig --level 0123456 ntpd on


校验系统设置

[root@test01 ~]# gpcheck -f /opt/gpinit/all_host -m test01(如有错误需要一一排查)

例如以下错误

20170215:00:22:13:004992 gpcheck:test01:root-[ERROR]:-GPCHECK_ERROR host(test02): on device (sr0) IO scheduler 'cfq' does not match expected value 'deadline'

20170215:00:22:13:004992 gpcheck:test01:root-[ERROR]:-GPCHECK_ERROR host(test01): on device (sr0) IO scheduler 'cfq' does not match expected value 'deadline'


echo deadline > /sys/block/sr0/queue/scheduler这个错误就可解决


创建GP初始化文件并进行修改

[gpadmin@test01 ~]$ mkdir /home/gpadmin/gpconfigs

[gpadmin@test01 ~]$ cp /usr/local/greenplum-db/docs/cli_help/gpconfigs/gpinitsystem_config /home/gpadmin/gpconfigs/gpinitsystem_config

[gpadmin@test01 ~]$ cd gpconfigs/

[gpadmin@test01 gpconfigs]$ chmod 775 gpinitsystem_config


[gpadmin@test01 gpconfigs]$ vi gpinitsystem_config

# FILE NAME: gpinitsystem_config

# Configuration file needed by the gpinitsystem

################################################
#### REQUIRED PARAMETERS
################################################

#### Name of this Greenplum system enclosed in quotes.
ARRAY_NAME="BJ Greenplum DW"

#### Naming convention for utility-generated data directories.
SEG_PREFIX=gpseg

#### Base number by which primary segment port numbers 
#### are calculated.
PORT_BASE=40000

#### File system location(s) where primary segment data directories 
#### will be created. The number of locations in the list dictate
#### the number of primary segments that will get created per
#### physical host (if multiple addresses for a host are listed in 
#### the hostfile, the number of segments will be spread evenly across
#### the specified interface addresses).
declare -a DATA_DIRECTORY=(/data1/gpdata/primary /data1/gpdata/primary)

#### OS-configured hostname or IP address of the master host.
MASTER_HOSTNAME=test01

#### File system location where the master data directory 
#### will be created.
MASTER_DIRECTORY=/data1/gpdata/master

#### Port number for the master instance. 
MASTER_PORT=5432

#### Shell utility used to connect to remote hosts.
TRUSTED_SHELL=ssh

#### Maximum log file segments between automatic WAL checkpoints.
CHECK_POINT_SEGMENTS=8

#### Default server-side character set encoding.
ENCODING=UNICODE

################################################
#### OPTIONAL MIRROR PARAMETERS
################################################

#### Base number by which mirror segment port numbers 
#### are calculated.
MIRROR_PORT_BASE=50000

#### Base number by which primary file replication port 
#### numbers are calculated.
REPLICATION_PORT_BASE=41000

#### Base number by which mirror file replication port 
#### numbers are calculated. 
MIRROR_REPLICATION_PORT_BASE=51000

#### File system location(s) where mirror segment data directories 
#### will be created. The number of mirror locations must equal the
#### number of primary locations as specified in the 
#### DATA_DIRECTORY parameter.
declare -a MIRROR_DATA_DIRECTORY=(/data1/gpdata/mirror /data1/gpdata/mirror)


################################################
#### OTHER OPTIONAL PARAMETERS
################################################

#### Create a database of this name after initialization.
DATABASE_NAME=bj_gp

#### Specify the location of the host address file here instead of
#### with the the -h option of gpinitsystem.
#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem


初始化GP

[gpadmin@test01 ~]$  gpinitsystem -c /home/gpadmin/gpconfigs/gpinitsystem_config -h /opt/gpinit/all_host

中间需要输入一次:Y


四.增加附加项

增加standby

test02备机上

[root@test02 ~]# mkdir /data1/gpdata/master

[root@test02 ~]# chown gpadmin:gpadmin /data1/gpdata/master

test01主机上执行该命令,中间输入一次Y

[gpadmin@test01 gpdata]$  gpinitstandby -s test02


增加mirror(镜像)前面初始化文件遗漏了才需要这一步,中间需要输入两次/data1/gpdata/mirror

[gpadmin@test01 gpdata]$ gpaddmirrors -p 1000


设置访问权限:

打开/data1/gpdata/master/gpseg-1/pg_hba.conf 按照最下面的格式添加客户端ip或网段

#user define

host    all     all     192.168.1.0/24   trust

host    all     all     127.0.0.1/28    trust


访问方式:

[gpadmin@test01 gpdata]$ psql -d test_db -h test01 -p 5432 -U gpadmin


实例恢复:gprecoverseg


通过gpstate gp_configuration 发现有实例down 掉以后,使用该命令进行回复。



GP常用命令

 

su - gpadmin

gpstart #正常启动

gpstop #正常关闭

gpstop -M fast #快速关闭

gpstop r #重启

gpstop u #重新加载配置文件

登陆与退出Greenplum

#正常登陆

psql gpdb

psql -d gpdb -h gphostm -p 5432 -U gpadmin

#使用utility方式

PGOPTIONS="-c gp_session_role=utility" psql -h -d dbname hostname -p port

#退出

psql命令行执行\q


# 写权限

GRANT INSERT ON writable_ext_table TO ;

# 写数据

INSERT INTO writable_ext_table SELECT * FROM regular_table;

copy

COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/home/gpadmin/a_list_countries.out';


参数查询

psql -c 'SHOW ALL;' -d gpdb

gpconfig --show max_connections

创建数据库

createdb -h localhost -p 5432 dhdw

创建GP文件系统

# 文件系统名

gpfsdw

# 子节点,视segment数创建目录

mkdir -p /gpfsdw/seg1

mkdir -p /gpfsdw/seg2

chown -R gpadmin:gpadmin /gpfsdw

# 主节点

mkdir -p /gpfsdw/master

chown -R gpadmin:gpadmin /gpfsdw

gpfilespace -o gpfilespace_config

gpfilespace -c gpfilespace_config

创建GP表空间

psql gpdb

create tablespace TBS_DW_DATA filespace gpfsdw;

SET default_tablespace = TBS_DW_DATA;

删除GP数据库

gpdeletesystem -d /gpmaster/gpseg-1 -f

查看segment配置

select * from gp_segment_configuration;

文件系统

select * from pg_filespace_entry;

磁盘、数据库空间

SELECT * FROM gp_toolkit.gp_disk_free ORDER BY dfsegment;

SELECT * FROM gp_toolkit.gp_size_of_database ORDER BY sodddatname;

日志

SELECT * FROM gp_toolkit.__gp_log_master_ext;

SELECT * FROM gp_toolkit.__gp_log_segment_ext;

表描述

/d+

表分析

VACUUM ANALYZE tablename;

表数据分布

SELECT gp_segment_id, count(*) FROM GROUP BY gp_segment_id;

表占用空间

SELECT relname as name, sotdsize/1024/1024 as size_MB, sotdtoastsize as toast, sotdadditionalsize as other

FROM gp_toolkit.gp_size_of_table_disk as sotd, pg_class

WHERE sotd.sotdoid = pg_class.oid ORDER BY relname;

索引占用空间

SELECT soisize/1024/1024 as size_MB, relname as indexname

FROM pg_class, gp_toolkit.gp_size_of_index

WHERE pg_class.oid = gp_size_of_index.soioid

AND pg_class.relkind='i';

OBJECT的操作统计

SELECT schemaname as schema, objname as table, usename as role, actionname as action, subtype as type, statime as time

FROM pg_stat_operations

WHERE objname = '';

SELECT locktype, database, c.relname, l.relation, l.transactionid, l.transaction, l.pid, l.mode, l.granted, a.current_query

FROM pg_locks l, pg_class c, pg_stat_activity a

WHERE l.relation=c.oid

AND l.pid=a.procpid

ORDER BY c.relname;

队列

SELECT * FROM pg_resqueue_status;


加载(LOAD)数据到Greenplum数据库

gpfdist外部表

# 启动服务

gpfdist -d /share/txt -p 8081 l /share/txt/gpfdist.log &

# 创建外部表,分隔符为’/t

drop EXTERNAL TABLE TD_APP_LOG_BUYER;

CREATE EXTERNAL TABLE TD_APP_LOG_BUYER (

IP text,

ACCESSTIME text,

REQMETHOD text,

URL text,

STATUSCODE int,

REF text,

name text,

VID text)

LOCATION ('gpfdist://gphostm:8081/xxx.txt')

FORMAT 'TEXT' (DELIMITER E'/t'

FILL MISSING FIELDS) SEGMENT REJECT LIMIT 1 percent;

# 创建普通表

create table test select * from TD_APP_LOG_BUYER;

# 索引

# CREATE INDEX idx_test ON test USING bitmap (ip);

# 查询数据

select ip , count(*) from test group by ip order by count(*);

gpload

# 创建控制文件

# 加载数据

gpload -f my_load.yml

copy

COPY country FROM '/data/gpdb/country_data'

WITH DELIMITER '|' LOG ERRORS INTO err_country

SEGMENT REJECT LIMIT 10 ROWS;

Greenplum数据库卸载(UNLOAD)数据

gpfdist外部表

# 创建可写外部表

CREATE WRITABLE EXTERNAL TABLE unload_expenses

( LIKE expenses )

LOCATION ('gpfdist://etlhost-1:8081/expenses1.out',

'gpfdist://etlhost-2:8081/expenses2.out')

FORMAT 'TEXT' (DELIMITER ',')

DISTRIBUTED BY (exp_id);

执行sql文件


psql gpdbname f yoursqlfile.sql

或者psql登陆后执行

\i yoursqlfile.sql



来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/30604784/viewspace-2130601/,如需转载,请注明出处,否则将追究法律责任。

转载于:http://blog.itpub.net/30604784/viewspace-2130601/

;