Bootstrap

clickHouse实现表自增ID的代码及相关逻辑

一、介绍

clickHourse表自增ID主要时两种方式:

  • insert数据时,手动维护一个全局ID
  • 给表设置uuid字段,使用 generateUUIDv4()函数赋予默认值。

这里的话推荐手动维护一个全局的自增ID,不推荐使用UUID的方式,主要原因有以下几个:

  • uuid字段占用的存储空间比手动维护自增ID的空间大很多,可参数性能对比章节。
  • 使用uuid字段,如果该表涉及到按批次分别读取,UUID无法实现因为UUID不能用来比较。
  • 如果使用需要做分页,UUID如何设置偏移量这是一个问题。

二、手动维护表的全局自增ID(推荐)

代码思路

  • 在插入数据之前,可以为每条记录生成一个唯一的自增ID,并将其作为排序的依据。
  • 多线程入库的话使用 Java的同步机制来确保ID的唯一性即AtomicLong类
  • 程序提供时或者插入前查询最大该表最大的ID:在插入新数据之前,从表中查询当前的最大ID。这个最大ID将作为新数据插入时的起始ID。
  • 然后每一次insert给DATAID赋予 AtomicLong类的值+1 即可

表结构

CREATE TABLE default.SQLLOG_FULL20240609_1809
(
    `DATAID` UInt64,
    `LOGTIME` DateTime64(3,'Asia/Shanghai'),
    `CURRTIME` DateTime('Asia/Shanghai') DEFAULT now(),
    `SESS` String,
    `THRD` Int64,
    `USERNAME` String,
    `TRXID` Int64,
    `STMT` String,
    `APPNAME` String,
    `IP` String,
    `INFOSTR` String
)
ENGINE = MergeTree
PARTITION BY toHour(LOGTIME)
ORDER BY (DATAID,LOGTIME)
SETTINGS index_granularity = 8192;

批量插入代码

@Data
public class LineBean
{
	public static int SQL_TYPE_PARAMS = 1;
	public static int SQL_TYPE_SQLSTR = 2;
	private String time;
	private String sess;
	private String thrd;
	private String user;
	private String trxid;
	private String stmt;
	private String appname;
	private String ip;
	private String infoStr;
	private Integer lineType; //判断是 参数 /Sql
	private String lineExecSqlType;
}

public class ClickHouseDbConnection {

    public static AtomicLong idGenerator = new AtomicLong(0);

    public static void main(String[] args) {
        //1. 设置当前表的最大ID 并放置到    
       Long tableMaxId = querySqlLogTableMaxId(connection, dbSqllogTableName);
        idGenerator.set(tableMaxId+1);
        //2. 封装对象List
        //3. 执行batchDataListCommit方法
        
    }
    
    
    //获取表中的DATAID最大值
    private static Long querySqlLogTableMaxId() throws SQLException {
        String url = "jdbc:clickhouse://192.168.112.146:8123/default";
        Properties properties = new Properties();
        properties.setProperty("user", "default");
        properties.setProperty("password", "root");
        
        Connection connection = DriverManager.getConnection(url, properties);
        String querySql = "SELECT MAX(DATAID) AS DATAID FROM "+dbSqllogTableName;
        ResultSet resultSet = connection.prepareStatement(querySql).executeQuery();
        resultSet.next();
        return resultSet.getLong(1);
    }

    

    public static void batchDataListCommit(List<LineBean>  batchData) {
        
        
        
        
          
        String url = "jdbc:clickhouse://192.168.112.146:8123/default";
        Properties properties = new Properties();
        properties.setProperty("user", "default");
        properties.setProperty("password", "root");
            
        String sqlLogTableName = PropertiesUtils.getCustomPropertyValue(CommonConstant.DBMS_SYNC_TABLE_NAME);
        Connection connection = null;
        PreparedStatement preparedStatement = null;
        try {
            connection = DriverManager.getConnection(url, properties);
            String insert_sql = "insert into "+sqlLogTableName+"(LOGTIME, SESS, THRD, USERNAME, TRXID, STMT, APPNAME, IP, INFOSTR) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
        
            connection.setAutoCommit(true);//将自动提交关闭
            preparedStatement = connection.prepareStatement(insert_sql);
        
            for (int i = 0; i < batchData.size(); i++) {
                int idx = 1;
                LineBean lineBean = batchData.get(i);
                preparedStatement.setLong(idx++, idGenerator.incrementAndGet());
                preparedStatement.setTimestamp(idx++,  new Timestamp(DateUtil.parse(lineBean.getTime(),DateTimeUtils.FORMATTER_STR).getTime()));
                preparedStatement.setString(idx++, lineBean.getSess());
                preparedStatement.setLong(idx++, Long.parseLong(lineBean.getThrd()));
                preparedStatement.setString(idx++, lineBean.getUser());
                preparedStatement.setLong(idx++, Long.parseLong(lineBean.getTrxid()));
                preparedStatement.setString(idx++, lineBean.getStmt());
                preparedStatement.setString(idx++, lineBean.getAppname());
                preparedStatement.setString(idx++, lineBean.getIp());
                preparedStatement.setString(idx++, lineBean.getInfoStr());
        
                preparedStatement.addBatch();
        
            }
            preparedStatement.executeBatch();
        
        } catch (SQLException e) {
            StaticLog.error(e,"batch dataList error info {}",e.getMessage());
        }finally {
            if (preparedStatement != null) {
                try {
                    preparedStatement.close();
                } catch (SQLException e) {
                    StaticLog.error(e,"close db preparedStatement error info {}",e.getMessage());
                }
            }
            if(connection!=null){
                try {
                    connection.close();
                } catch (SQLException e) {
                    StaticLog.error(e,"close db connection error info {}",e.getMessage());
                }
            }
        
        }
    }       
}

三、给表设置自增UUID字段

create table时 新增字段``DATAID UUID DEFAULT generateUUIDv4() 会默认生成唯一键
image.png

表结构

CREATE TABLE default.SQLLOG_FULL20240609_1220
(
  `DATAID` UUID DEFAULT generateUUIDv4(),
  `LOGTIME` DateTime64(3,'Asia/Shanghai'),
  `CURRTIME` DateTime('Asia/Shanghai') DEFAULT now(),
  `SESS` String,
  `THRD` Int64,
  `USERNAME` String,
  `TRXID` Int64,
  `STMT` String,
  `APPNAME` String,
  `IP` String,
  `INFOSTR` String
)
ENGINE = MergeTree
PARTITION BY toHour(LOGTIME)
ORDER BY (LOGTIME, CURRTIME)
SETTINGS index_granularity = 8192;

批量插入代码

@Data
public class LineBean
{
	public static int SQL_TYPE_PARAMS = 1;
	public static int SQL_TYPE_SQLSTR = 2;
	private String time;
	private String sess;
	private String thrd;
	private String user;
	private String trxid;
	private String stmt;
	private String appname;
	private String ip;
	private String infoStr;
	private Integer lineType; //判断是 参数 /Sql
	private String lineExecSqlType;
}



  public static void batchDataListCommit(List<LineBean>  batchData) {

    String url = "jdbc:clickhouse://192.168.112.146:8123/default";
    Properties properties = new Properties();
    properties.setProperty("user", "default");
    properties.setProperty("password", "root");
        
    String sqlLogTableName = PropertiesUtils.getCustomPropertyValue(CommonConstant.DBMS_SYNC_TABLE_NAME);
    Connection connection = null;
    PreparedStatement preparedStatement = null;
    try {
        connection = DriverManager.getConnection(url, properties);
        String insert_sql = "insert into "+sqlLogTableName+"(LOGTIME, SESS, THRD, USERNAME, TRXID, STMT, APPNAME, IP, INFOSTR) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";

        connection.setAutoCommit(true);//将自动提交关闭
        preparedStatement = connection.prepareStatement(insert_sql);

        for (int i = 0; i < batchData.size(); i++) {
            int idx = 1;
            LineBean lineBean = batchData.get(i);

            preparedStatement.setTimestamp(idx++,  new Timestamp(DateUtil.parse(lineBean.getTime(),DateTimeUtils.FORMATTER_STR).getTime()));
            preparedStatement.setString(idx++, lineBean.getSess());
            preparedStatement.setLong(idx++, Long.parseLong(lineBean.getThrd()));
            preparedStatement.setString(idx++, lineBean.getUser());
            preparedStatement.setLong(idx++, Long.parseLong(lineBean.getTrxid()));
            preparedStatement.setString(idx++, lineBean.getStmt());
            preparedStatement.setString(idx++, lineBean.getAppname());
            preparedStatement.setString(idx++, lineBean.getIp());
            preparedStatement.setString(idx++, lineBean.getInfoStr());

            preparedStatement.addBatch();

        }
        preparedStatement.executeBatch();

    } catch (SQLException e) {
        StaticLog.error(e,"batch dataList error info {}",e.getMessage());
    }finally {
        if (preparedStatement != null) {
            try {
                preparedStatement.close();
            } catch (SQLException e) {
                StaticLog.error(e,"close db preparedStatement error info {}",e.getMessage());
            }
        }
        if(connection!=null){
            try {
                connection.close();
            } catch (SQLException e) {
                StaticLog.error(e,"close db connection error info {}",e.getMessage());
            }
        }

    }
}       

四、关于表自增方式的性能对比

插入性能上差距不上,使用相同的1G日志进行批量入库
1)不添加自增ID,入库效率稳定在17S左右,表大小118M
2)使用UUID方式的自增ID方式,入库效率在17-21S左右,表大小为153M ,较不添加增长35M
3)使用外部维护表自增ID方式,入库效率在18S左右,表大小127M,较无主键时增长9M
image.png
image.png
image.png

;