一、启用归档日志
1. 用dba进入数据库
sqlplus / AS SYSDBA
2. 开启归档日志
修改归档日志大小,目录
alter system set db_recovery_file_dest_size = 10G;
alter system set db_recovery_file_dest = '/oradata/dg01/recovery_area' scope=spfile;
alter system set db_recovery_file_dest_size=41820M scope=spfile;
# 重启数据库实例,打开归档日志
shutdown immediate;
startup mount;
alter database archivelog;
alter database open;
# 查看归档
archive log list;
3. 开启补全日志
# 开启单个表
ALTER TABLE schema.table ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS;
# 开启全库
ALTER DATABASE ADD SUPPLEMENTAL LOG DATA;
# 全体字段补充日志
## 打开all补全日志(建议执行)
alter database add supplemental log data (all) columns;
## 查看是否打开
select supplemental_log_data_all as all from v$database ;
## 删除all补全日志
alter database drop supplemental log data (all) columns;
二、创建Oracle用户并授权
1. 创建表空间
CREATE TABLESPACE logminer_tbs DATAFILE '/oradata/dg01/logminer_tbs.dbf' SIZE 25M REUSE AUTOEXTEND ON MAXSIZE UNLIMITED;
2. 创建用户并授权
CREATE USER flink IDENTIFIED BY flink DEFAULT TABLESPACE LOGMINER_TBS QUOTA UNLIMITED ON LOGMINER_TBS;
GRANT CREATE SESSION TO flink;
GRANT SET CONTAINER TO flink; //
GRANT SELECT ON V_$DATABASE to flink;
GRANT FLASHBACK ANY TABLE TO flink;
GRANT SELECT ANY TABLE TO flink;
GRANT SELECT_CATALOG_ROLE TO flink;
GRANT EXECUTE_CATALOG_ROLE TO flink;
GRANT SELECT ANY TRANSACTION TO flink;
GRANT LOGMINING TO flink;
GRANT CREATE TABLE TO flink;
GRANT LOCK ANY TABLE TO flink;
GRANT ALTER ANY TABLE TO flink;
GRANT CREATE SEQUENCE TO flink;
GRANT EXECUTE ON DBMS_LOGMNR TO flink;
GRANT EXECUTE ON DBMS_LOGMNR_D TO flink;
GRANT SELECT ON V_$LOG TO flink;
GRANT SELECT ON V_$LOG_HISTORY TO flink;
GRANT SELECT ON V_$LOGMNR_LOGS TO flink;
GRANT SELECT ON V_$LOGMNR_CONTENTS TO flink;
GRANT SELECT ON V_$LOGMNR_PARAMETERS TO flink;
GRANT SELECT ON V_$LOGFILE TO flink;
GRANT SELECT ON V_$ARCHIVED_LOG TO flink;
GRANT SELECT ON V_$ARCHIVE_DEST_STATUS TO flink;
三、代码
1. pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>flink-cdc</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<flink.version>1.12.1</flink.version>
<target.java.version>1.8</target.java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-oracle-cdc</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.22</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.20</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- Hadoop-->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.2</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.2</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.9.2</version>
<!-- <scope>provided</scope>-->
</dependency>
</dependencies>
<build>
<plugins>
<!-- Java Compiler -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>${target.java.version}</source>
<target>${target.java.version}</target>
</configuration>
</plugin>
<!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
<!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<executions>
<!-- Run shade goal on package phase -->
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>org.apache.flink:force-shading</exclude>
<exclude>com.google.code.findbugs:jsr305</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>org.apache.logging.log4j:*</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<!-- Do not copy the signatures in the META-INF folder.
Otherwise, this might cause SecurityExceptions when using the JAR. -->
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>spendreport.FraudDetectionJob</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!-- This improves the out-of-the-box experience in Eclipse by resolving some warnings. -->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<versionRange>[3.0.0,)</versionRange>
<goals>
<goal>shade</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<versionRange>[3.1,)</versionRange>
<goals>
<goal>testCompile</goal>
<goal>compile</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
2. Demo.java
/**
* @author Endless
* 注意:并行度必须设置为1,否则执行顺序不对。
*/
public class Demo {
public static void main(String[] args) throws Exception {
Properties pros = new Properties();
pros.setProperty("debezium.log.mining.strategy", "online_catalog");
pros.setProperty("debezium.log.mining.continuous.mine", "true");
DebeziumSourceFunction<String> sourceFunction = OracleSource.<String>builder()
.hostname("ip")
.port(1521)
.database("数据库")
.schemaList("schema")
.tableList("schema.table1, schema.table2")
.username("flink")
.password("flink")
.debeziumProperties(pros)
.deserializer(new JsonDebeziumDeserializationSchema())
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 指定登录hadoop的用户
System.setProperty("HADOOP_USER_NAME", "hadoop");
// 开启检查点
env.enableCheckpointing(1000);
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
// 检查点存储位置
env.setStateBackend(new FsStateBackend("hdfs://ip:9000/user/bd/flink/checkpoint/", true));
// 取消作业,checkpoint清除策略
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
// 数据源
DataStreamSource<String> source = env.addSource(sourceFunction);
//
SingleOutputStreamOperator<String> archiveLog = source.map((MapFunction<String, String>) json -> {
ArchiveLog archiveLog1 = JSON.parseObject(json, ArchiveLog.class);
return JSON.toJSONString(archiveLog1);
});
// 消息存入数据库TiDB
archiveLog.addSink(new SinkToTiDB());
env.execute("flink cdc");
}
private static class SinkToTiDB extends RichSinkFunction<String> {
private transient DruidDataSource dataSource = null;
@Override
public void open(Configuration parameters) throws Exception {
// 数据库连接
dataSource = new DruidDataSource();
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUsername("username");
dataSource.setPassword("password");
dataSource.setUrl("jdbc:mysql://ip:port/database?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false");
dataSource.setMaxActive(5);
}
@Override
public void invoke(String json, Context context) throws Exception {
ArchiveLog archiveLog = JSON.parseObject(json, ArchiveLog.class);
String op = archiveLog.getOp();
ArchiveLogSource source = archiveLog.getSource();
String after = archiveLog.getAfter();
JSONObject jsonObject = JSON.parseObject(after);
String sql = "";
switch (op) {
// insert 新增
case "c":
System.out.println("新增逻辑");
StringBuilder keyBuilder = new StringBuilder();
StringBuilder valueBuilder = new StringBuilder();
for (String item : jsonObject.keySet()) {
keyBuilder.append(item).append(",");
valueBuilder.append("'").append(jsonObject.get(item)).append("'").append(",");
}
String key = keyBuilder.substring(0, keyBuilder.length() - 1);
String value = valueBuilder.substring(0, valueBuilder.length() - 1);
sql = "insert into " + source.getSchema() + "." + source.getTable() + "(" + key + ") values(" + value + ")";
break;
// update 更新
case "u":
System.out.println("更新逻辑");
StringBuilder updateBuilder = new StringBuilder();
StringBuilder idBuilder = new StringBuilder();
for (String item : jsonObject.keySet()) {
if (item.equalsIgnoreCase("id")) {
idBuilder.append("'").append(jsonObject.get(item)).append("'");
} else {
updateBuilder.append(item).append("=").append("'").append(jsonObject.get(item)).append("'").append(",");
}
}
String keyValue = updateBuilder.substring(0, updateBuilder.length() - 1);
String id = idBuilder.toString();
System.out.println(keyValue);
sql = "update " + source.getSchema() + "." + source.getTable() + " set " + keyValue + " where id =" + id;
break;
// delete 删除
case "d":
String before = archiveLog.getBefore();
JSONObject deleteObj = JSON.parseObject(before);
id = deleteObj.get("ID").toString();
System.out.println("删除逻辑");
sql = "delete from " + source.getSchema() + "." + source.getTable() + " where id = '" + id + "'";
break;
case "r":
System.out.println("读取逻辑");
break;
}
Connection conn = null;
PreparedStatement ps = null;
try {
conn = dataSource.getConnection();
ps = conn.prepareStatement(sql);
ps.execute();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (conn != null) {
conn.close();
}
}
}
}
}
3. ArchiveLog.java
/**
* @author Endless
*/
@Data
@ToString
public class ArchiveLog {
private String before;
private String after;
private ArchiveLogSource source;
private String op;
private String ts_ms;
private String transaction;
}
4. ArchiveLogSource.java
/**
* @author Endless
*/
@Data
@ToString
public class ArchiveLogSource {
private String version;
private String connector;
private String name;
private String ts_ms;
private String snapshot;
private String db;
private String sequence;
private String schema;
private String table;
private String txId;
private String scn;
private String commit_scn;
private String lcr_position;
}
四、补充
1. 归档日志
# 新增
{
"before": null,
"after": {
"ID": "1",
"NAME": "1"
},
"source": {
"version": "1.5.4.Final",
"connector": "oracle",
"name": "oracle_logminer",
"ts_ms": 1646652622448,
"snapshot": "last",
"db": "DG01",
"sequence": null,
"schema": "test",
"table": "CDCTEST",
"txId": null,
"scn": "46495548600",
"commit_scn": null,
"lcr_position": null
},
"op": "c",
"ts_ms": 1646652622456,
"transaction": null
}
# 更新
{
"before": {
"ID": "1",
"NAME": "1"
},
"after": {
"ID": "1",
"NAME": "2"
},
"source": {
"version": "1.5.4.Final",
"connector": "oracle",
"name": "oracle_logminer",
"ts_ms": 1646680890000,
"snapshot": "false",
"db": "DG01",
"sequence": null,
"schema": "test",
"table": "CDCTEST",
"txId": "0a0009007f231200",
"scn": "46495572789",
"commit_scn": "46495590649",
"lcr_position": null
},
"op": "u",
"ts_ms": 1646652829683,
"transaction": null
}
# 删除
{
"before": {
"ID": "1",
"NAME": "2"
},
"after": null,
"source": {
"version": "1.5.4.Final",
"connector": "oracle",
"name": "oracle_logminer",
"ts_ms": 1646819782000,
"snapshot": "false",
"db": "DG01",
"sequence": null,
"schema": "FLINK",
"table": "CDC2",
"txId": "0a00140054270000",
"scn": "2491112",
"commit_scn": "2491120",
"lcr_position": null
},
"op": "d",
"ts_ms": 1646791645954,
"transaction": null
}
# 读取
{
"before": null,
"after": {
"ID": "1",
"NAME": "1"
},
"source": {
"version": "1.5.4.Final",
"connector": "oracle",
"name": "oracle_logminer",
"ts_ms": 1646652622448,
"snapshot": "last",
"db": "DG01",
"sequence": null,
"schema": "test",
"table": "CDCTEST",
"txId": null,
"scn": "46495548600",
"commit_scn": null,
"lcr_position": null
},
"op": "r",
"ts_ms": 1646652622456,
"transaction": null
}