diff --git a/ClusterMonitor.py b/ClusterMonitor.py index 39ca278..b6686af 100644 --- a/ClusterMonitor.py +++ b/ClusterMonitor.py @@ -9,7 +9,7 @@ import urllib.request import pymysql from pymysql import OperationalError -formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s') +formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(lineno)d|%(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) file_handler = logging.FileHandler("debug.log") @@ -32,6 +32,10 @@ SLOW_CONFIG = { } +class DatabaseConnectError(Exception): + pass + + def get_db_conf(): return { 'host': "127.0.0.1", # 只能控制本地数据库 @@ -39,6 +43,7 @@ def get_db_conf(): 'user': "root", 'password': "Yanei!23", 'db': "rms_ge_prod", + 'connect_timeout': 5 } @@ -96,17 +101,19 @@ class Server: if self.db_conf is None: logger.error(f"db_config error: {self.db_conf}") raise ValueError("Database URL is None") - self.conn = self.get_connection() + self.conn = None def get_connection(self): - while True: + for i in range(10): try: return pymysql.connect(**self.db_conf) except Exception as e: logger.exception(e) - time.sleep(5) + time.sleep(3) continue + raise DatabaseConnectError("Connection error") + def get_cluster_status(self): while True: try: @@ -117,12 +124,8 @@ class Server: except OperationalError as e: logger.error(e) - try: - self.conn.close() - self.conn = self.get_connection() - except Exception as e: - logger.exception(e) - time.sleep(10) + self.conn.close() + self.conn = self.get_connection() def start_standalone_mode(self): logger.info("Start standalone mode...") @@ -314,6 +317,8 @@ class Server: cur.execute(f"use {get_db_conf()['db']}") cur.execute(f"show tables") logger.info("database is normal") + except DatabaseConnectError as e: + raise e except Exception as e: logger.error(e) time.sleep(1) @@ -351,15 +356,43 @@ class Server: logger.info("end cluster_run...") return + def to_standalone_run(self): + """切换为单机模式""" + # 当前就是单机模式,只能重启尝试 + if os.path.exists(f"{MYSQL_CONFIG}.cluster_bak"): + logger.info("is standalone and try restart") + os.system("kill -9 $(ps aux | grep mysqld | awk '{print $2}')") + logger.info("kill -9 mysql") + time.sleep(3) + # 当前是集群模式,切换为单机模式 + else: + logger.info("is cluster and cluster to standalone") + self.start_standalone_mode() + def run(self): while True: - now_status = self.get_cluster_status() - if int(now_status.get("wsrep_cluster_size") or 0) != 0: - self.cluster_run() - else: - self.standalone_run() + try: + if not self.conn: + self.conn = self.get_connection() - time.sleep(5) + now_status = self.get_cluster_status() + if int(now_status.get("wsrep_cluster_size") or 0) != 0: + self.cluster_run() + else: + self.standalone_run() + + time.sleep(5) + except DatabaseConnectError as e: + # 数据库异常,切换为单机模式 + logger.error(e) + logger.info("database connect error, to standalone") + self.to_standalone_run() + time.sleep(5) + + # 重置连接 + if self.conn: + self.conn.close() + self.conn = None if __name__ == '__main__':