python库对库数据同步脚本--适用于定时任务

- 使用场景
数据采集平台采集到设备端数据、脚本通过数据中台定时同步到业务系统（库同步到库）

脚本：

1、基础通用连接脚本 base.py

import logging
import datetime
import pymssql
import psycopg2
import pymysql

LOG_DIR = "/storage/workspace/logs"
TASK_SCRIPT_DIR = "/storage/workspace/tasks"
PYTHON_EXE = "/usr/bin/python3"

NOW = datetime.datetime.now()
LOG_FILE_NAME = NOW.strftime("runlog_%Y%m%d.log")

logging.basicConfig(
    filename=LOG_DIR + "/" + LOG_FILE_NAME,
    level=logging.DEBUG,
    datefmt="%Y-%m-%d %H:%M:%S",
    format="%(asctime)s] %(levelname)s] %(message)s",
)
def connect_db(source):
    if source.db_type == "mssql":
        conn = pymssql.connect(
            host=source.host + ":" + str(source.port),
            user=source.username,
            password=source.password,
            database=source.database,
        )
    elif source.db_type == "pgsql":
        conn = psycopg2.connect(
            host=source.host,
            port=source.port,
            user=source.username,
            password=source.password,
            database=source.database,
        )
    elif source.db_type == "mysql":
        conn = pymysql.connect(
            host=source.host,
            port=source.port,
            user=source.username,
            password=source.password,
            database=source.database,
        )
    else:
        raise ValueError("db_type")
    return conn


class HydroDataSource(object):
    """
    hydrodata source
    """

    def __init__(self, db_type, host, port, username, password, database) -> None:
        super().__init__()

        self.db_type = db_type
        self.host = host
        self.port = port
        self.username = username
        self.password = password
        self.database = database

        self.stations = {}

        self.current_conn = None

    def addStation(
        self, table_name, station_code, has_power=True, power_table_name="ST_DEFAULT_TABLE"
    ) -> None:
        if table_name not in self.stations:
            self.stations[table_name] = {}
        self.stations[table_name][station_code] = True
        if has_power:
            if power_table_name not in self.stations:
                self.stations[power_table_name] = {}
        self.stations[power_table_name][station_code] = True

    def make_conn(self):
        if (
            self.current_conn is None
            or (self.db_type == "mssql" and self.current_conn._conn.connected == False)
            or (self.db_type == "pgsql" and self.current_conn.closed)
            or (self.db_type == "mysql" and self.current_conn._closed)
        ):
            conn = connect_db(self)
            self.current_conn = conn
        return self.current_conn

    def make_test(self):
        try:
            conn = self.make_conn()
            cursor = conn.cursor()
            cursor.execute("SELECT CURRENT_TIMESTAMP;")
            resu = cursor.fetchone()
            return resu[0]
        except:
            return False

1、业务同步脚本tasks.py

#!/usr/bin/python3
import pymssql
import psycopg2
import datetime
import logging
import pymysql

from synchronize_base import *


source_db = HydroDataSource(
    "pgsql", "127.0.0.1", 15432, "postgres", "xxxxx", "xxx"
)

to_db = HydroDataSource(
    "pgsql", "127.0.0.1", 5432, "postgres", "xxxxx", "xxx"
)

loop_interval_time = datetime.timedelta(hours=1)
nowOfEpoch = datetime.datetime.fromtimestamp(
    ceil(datetime.datetime.now().astimezone().timestamp() / 3600) * 3600
)
param_from_time = nowOfEpoch - loop_interval_time
param_to_time = nowOfEpoch


def fetch_data(source, station_id, to_time):
    conn = source.make_conn()
    cursor = conn.cursor()

    station_values = []
    station_values.append(station_id)
    station_values.append(to_time)
        sql = """
        SELECT 
        q
        FROM 
            ST_TABLE_R
        WHERE 
            stcd  = '{}' AND tm >= '{}' AND tm <= '{}' AND q >= 0
        ORDER BY 
            tm DESC
        LIMIT 1;
        """.format(
            station_id, to_time - datetime.timedelta(hours=2), to_time
        )
        cursor.execute(sql)
        rs = cursor.fetchone()
        if rs is not None and rs[0] is not None:
            station_values.append(rs[0])
    station_values.append(to_time)

    return station_values

def insert_data(source, result_set):
    conn = source.make_conn()
    cursor = conn.cursor()
    sql = """
    INSERT INTO b_water_instantaneous_flow (pro_id,data_time,in_warehouse,out_warehouse,supply,irrigation,flood_discharge,ecology,power,create_time) 
    VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
    ON CONFLICT(pro_id,data_time) DO NOTHING;
        """.replace(
        "\n", ""
    ).replace(
        "  ", " "
    )
    rs = cursor.execute(sql, result_set)
    conn.commit()

    return rs


def synchronize(source_db, to_db):
    # interval_time = datetime.timedelta(days=1)
    current_from = param_from_time
    current_to = current_from + loop_interval_time
    station_id = "000000000000001";
    while current_to <= param_to_time:
        print(
            "synchronize from {} to {}...".format(
                current_from.strftime("%Y-%m-%d %H:%M:%S"),
                current_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
        )
        logging.info(
            "synchronize from {} to {}...".format(
                current_from.strftime("%Y-%m-%d %H:%M:%S"),
                current_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
        )
        rs = fetch_data(source_db, station_id, current_from)
            rs = insert_data(to_db, rs)
        current_from = current_to
        current_to = current_from + loop_interval_time


print("start synchronize...")
synchronize(source_db, to_db)
print("finish synchronize...")

//代码为主要实现的逻辑，具体根据实际业务需求，当前是查询中台当前时间往前两个小时最新一条数据；

使用系统crond定时执行 crondtab -e进入编辑模式

*/5 * * * * /storage/tasks.py

20个经典Crontab应用实例

实例1：每1分钟执行一次

* * * * * /mnt/backup.sh

实例2：每小时的第3和第15分钟执行一次

3,15 * * * * /mnt/backup.sh

实例3：每天的8点到11点的第3和第15分钟执行一次

3,15 8-11 * * * /mnt/backup.sh

实例4：每隔两天的上午8点到11点的第3和第15分钟执行一次

3,15 8-11 */2 * * /mnt/backup.sh

实例5：每周一上午8点到11点的第3和第15分钟执行一次

3,15 8-11 * * 1 /mnt/backup.sh

实例6：每晚的21:30执行一次

30 21 * * * /mnt/backup.sh

实例7：每月1、10、22日的4 : 45执行一次

45 4 1,10,22 * * /mnt/backup.sh

实例8：每周六、周日的1 : 10执行一次

10 1 * * 6,0 /mnt/backup.sh

实例9：每天18 : 00至23 : 00之间每隔30分钟执行一次

0,30 18-23 * * * /mnt/backup.sh

实例10：每星期六的晚上23: 00 pm执行一次

0 23 * * 6 /mnt/backup.sh

实例11：每一小时执行一次

* */1 * * * /mnt/backup.sh

实例12：每天晚上23点到第二天7点之间，每隔一小时执行一次

* 23-7/1 * * * /mnt/backup.sh

实例13：每个星期的第一天执行一次（即每个星期天晚上24：00开始执行）.

@weekly /mnt/backup.sh

实例14：每个月的15日执行一次.

0 11 15 * * /mnt/backup.sh

实例15：每个月的第一天执行一次（即每个月的1日凌晨0点开始执行）.

@monthly /mnt/backup.sh

实例16：在指定的月份执行一次（在1月,4月和 6月每天晚上0点执行一次）.

0 0 * jan,apr,jun * /mnt/backup.sh

实例17：重启后执行一次.

@reboot /mnt/backup.sh

实例18：定时任务执行后发一封邮件通知.

MAILTO="raj"
1 1 * * * /mnt/backup.sh

实例19：指定shell （默认的是/bin/bash）

SHELL=/bin/sh
1 1 * * * /mnt/backup.sh

实例20：指定环境变量.

PATH=/sbin:/bin:/usr/sbin:/usr/bin
1 1 * * * /mnt/backup.sh

参考链接：Linux Crontab命令定时任务基本语法 - 天宇轩-王 - 博客园

技术交流群QQ：707196135

python库对库数据同步脚本--适用于定时任务

20个经典Crontab应用实例

悦读