import schedule
import time
import pyodbc
import pandas as pd
from datetime import datetime, timedelta
from sqlalchemy import create_engine, text
import warnings
import logging
source_databases = [
{
'database_name': '',
'server': '',
'database': '',
'username': '',
'password': '',
'branch_id': 0
},
]
target_database = {
'database_name': '',
'server': '',
'database': '',
'username': '',
'password': ''
}
tables = {
'cmis_patientinfo': {'unique_field': '唯一标识', 'date_field': '日期字段', 'fendian_field': '分店ID'},
'cmis_yuyue': {'unique_field': '唯一标识', 'date_field': '日期字段', 'fendian_field': '分店ID'},
}
def connect_to_db(config):
connection_string = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={config['server']};DATABASE={config['database']};UID={config['username']};PWD={config['password']}"
return pyodbc.connect(connection_string)
def process_data(df, branch_id, fendian_field):
df[fendian_field] = branch_id
return df
def get_yesterday_data(connection, table, date_field):
yesterday_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1)
yesterday_end = yesterday_start + timedelta(hours=23, minutes=59, seconds=59)
query = f"SELECT * FROM {table} WHERE {date_field} BETWEEN ? AND ? AND (upload != 5 OR upload IS NULL)"
warnings.filterwarnings('ignore', category=UserWarning,
message="pandas only supports SQLAlchemy connectable")
return pd.read_sql(query, connection, params=[yesterday_start, yesterday_end])
def get_target_columns(connection, table):
cursor = connection.cursor()
cursor.execute(f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table}'")
columns = [row.COLUMN_NAME for row in cursor.fetchall()]
cursor.close()
return columns
def convert_data_types(row):
new_row = []
for value in row:
if pd.isnull(value):
new_row.append(None)
elif isinstance(value, pd.Timestamp):
new_row.append(value.to_pydatetime())
else:
new_row.append(value)
return tuple(new_row)
def insert_data_to_target(source_connection, target_connection, table, unique_field, df, db_config):
print(f"{time.ctime()} —— {db_config['database_name']}:表-{table}: 数据插入中...")
source_cursor = source_connection.cursor()
target_cursor = target_connection.cursor()
target_columns = get_target_columns(target_connection, table)
df_columns = df.columns.tolist()
common_columns = [col for col in df_columns if col in target_columns]
success_count = 0
failure_count = 0
error = ''
for index, row in df.iterrows():
columns = ', '.join(common_columns)
placeholders = ', '.join(['?' for _ in common_columns])
values = convert_data_types(row[common_columns])
insert_query = f"INSERT INTO {table} ({columns}) VALUES ({placeholders})"
try:
target_cursor.execute(insert_query, values)
target_connection.commit()
unique_value = row[unique_field]
update_source_query = f"UPDATE {table} SET upload = 5 WHERE {unique_field} = ?"
source_cursor.execute(update_source_query, unique_value)
source_connection.commit()
update_target_query = f"UPDATE {table} SET upload = 5 WHERE {unique_field} = ?"
target_cursor.execute(update_target_query, unique_value)
target_connection.commit()
success_count += 1
except Exception as e:
failure_count += 1
error = e
target_connection.rollback()
source_connection.rollback()
source_cursor.close()
target_cursor.close()
message = f"{time.ctime()} —— {db_config['database_name']}:表-{table}: 插入成功 {success_count} 条, 插入失败 {failure_count} 条"
if failure_count > 0:
message += f", 失败原因: {error}"
print(message)
def main_task():
source_conn = None
target_conn = None
for db_config in source_databases:
try:
source_conn = connect_to_db(db_config)
print(f"{time.ctime()} —— {db_config['database_name']}-数据库连接成功!")
target_conn = connect_to_db(target_database)
print(f"{time.ctime()} —— {target_database['database_name']}-数据库连接成功!")
for table, fields in tables.items():
unique_field = fields['unique_field']
date_field = fields['date_field']
fendian_field = fields['fendian_field']
try:
df = get_yesterday_data(source_conn, table, date_field)
if not df.empty:
processed_df = process_data(df, db_config['branch_id'], fendian_field)
insert_data_to_target(source_conn, target_conn, table, unique_field, processed_df, db_config)
else:
print(f"{time.ctime()} —— {db_config['database_name']}:表-{table}: 暂无待处理的昨日数据!")
except Exception as e:
print(f"{time.ctime()} —— {db_config['database_name']}:表-{table}:处理数据失败!error: {e}")
except Exception as e:
logging.error(f"{time.ctime()} —— {db_config['database_name']}-数据库连接失败! error: {e}")
continue
finally:
try:
if source_conn is not None:
source_conn.close()
print(f"{time.ctime()} —— 关闭数据库连接 {db_config['database_name']}")
except Exception as e:
logging.error(f"{time.ctime()} —— 关闭 {db_config['database_name']}-数据库连接时出错: {e}")
try:
if target_conn is not None:
target_conn.close()
print(f"{time.ctime()} —— 关闭数据库连接 {target_database['database_name']}")
except Exception as e:
logging.error(f"{time.ctime()} —— 关闭 {target_database['database_name']}-数据库连接时出错: {e}")
schedule.every().day.at("03:00").do(main_task)
while True:
schedule.run_pending()
time.sleep(40)