增加数据库异常重启处理

master
ghb 6 months ago
parent f4bef55a8b
commit 62cf311af3

@ -9,7 +9,7 @@ import urllib.request
import pymysql import pymysql
from pymysql import OperationalError from pymysql import OperationalError
formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s') formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(lineno)d|%(message)s')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler("debug.log") file_handler = logging.FileHandler("debug.log")
@ -32,6 +32,10 @@ SLOW_CONFIG = {
} }
class DatabaseConnectError(Exception):
pass
def get_db_conf(): def get_db_conf():
return { return {
'host': "127.0.0.1", # 只能控制本地数据库 'host': "127.0.0.1", # 只能控制本地数据库
@ -39,6 +43,7 @@ def get_db_conf():
'user': "root", 'user': "root",
'password': "Yanei!23", 'password': "Yanei!23",
'db': "rms_ge_prod", 'db': "rms_ge_prod",
'connect_timeout': 5
} }
@ -96,17 +101,19 @@ class Server:
if self.db_conf is None: if self.db_conf is None:
logger.error(f"db_config error: {self.db_conf}") logger.error(f"db_config error: {self.db_conf}")
raise ValueError("Database URL is None") raise ValueError("Database URL is None")
self.conn = self.get_connection() self.conn = None
def get_connection(self): def get_connection(self):
while True: for i in range(10):
try: try:
return pymysql.connect(**self.db_conf) return pymysql.connect(**self.db_conf)
except Exception as e: except Exception as e:
logger.exception(e) logger.exception(e)
time.sleep(5) time.sleep(3)
continue continue
raise DatabaseConnectError("Connection error")
def get_cluster_status(self): def get_cluster_status(self):
while True: while True:
try: try:
@ -117,12 +124,8 @@ class Server:
except OperationalError as e: except OperationalError as e:
logger.error(e) logger.error(e)
try: self.conn.close()
self.conn.close() self.conn = self.get_connection()
self.conn = self.get_connection()
except Exception as e:
logger.exception(e)
time.sleep(10)
def start_standalone_mode(self): def start_standalone_mode(self):
logger.info("Start standalone mode...") logger.info("Start standalone mode...")
@ -314,6 +317,8 @@ class Server:
cur.execute(f"use {get_db_conf()['db']}") cur.execute(f"use {get_db_conf()['db']}")
cur.execute(f"show tables") cur.execute(f"show tables")
logger.info("database is normal") logger.info("database is normal")
except DatabaseConnectError as e:
raise e
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
time.sleep(1) time.sleep(1)
@ -351,15 +356,43 @@ class Server:
logger.info("end cluster_run...") logger.info("end cluster_run...")
return return
def to_standalone_run(self):
"""切换为单机模式"""
# 当前就是单机模式,只能重启尝试
if os.path.exists(f"{MYSQL_CONFIG}.cluster_bak"):
logger.info("is standalone and try restart")
os.system("kill -9 $(ps aux | grep mysqld | awk '{print $2}')")
logger.info("kill -9 mysql")
time.sleep(3)
# 当前是集群模式,切换为单机模式
else:
logger.info("is cluster and cluster to standalone")
self.start_standalone_mode()
def run(self): def run(self):
while True: while True:
now_status = self.get_cluster_status() try:
if int(now_status.get("wsrep_cluster_size") or 0) != 0: if not self.conn:
self.cluster_run() self.conn = self.get_connection()
else:
self.standalone_run()
time.sleep(5) now_status = self.get_cluster_status()
if int(now_status.get("wsrep_cluster_size") or 0) != 0:
self.cluster_run()
else:
self.standalone_run()
time.sleep(5)
except DatabaseConnectError as e:
# 数据库异常,切换为单机模式
logger.error(e)
logger.info("database connect error, to standalone")
self.to_standalone_run()
time.sleep(5)
# 重置连接
if self.conn:
self.conn.close()
self.conn = None
if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save