随着集团MHA集群的日渐增长,MHA管理平台话越来越迫切.而MHA平台的建设第一步就是将这些成百上千套的MHA集群信息收集起来,便于查询和管理.
MHA主要信息如下:
(1)基础配置信息;
集团目前数据库的管理平台是在Archery的基础上打造,所以,需要将此功能嵌入到既有平台上.通过Archery系统进行查询展示.
简单来说,通过 Filebeat + Logstash + MySQL 架构 来收集保存各个集群的配置信息、启动及FailOver的log信息 和运行状态信息.
运行状态信息是通过一个小程序获取的,这个小程序每分钟执行一次,会把执行结果输出到文件中.当然这个文件是被failbeat监控的.
文件为mha_checkstatus.py
#!/usr/bin/python # -*- coding: UTF-8 -*- import os import io import re import ConfigParser Path='/etc/mha' #fout=open('输出文件名','w') for Name in os.listdir(Path) : Pathname= os.path.join(Path,Name) ## print(Pathname) ## print(Name) config =ConfigParser.ConfigParser() try: server_item = config.sections() server1_host = '' ##MHA cnf 配置文件中的节点1 server2_host = '' ##MHA cnf 配置文件中的节点2 server3_host = '' ##MHA cnf 配置文件中的节点3 mha_cnf_remark = '' if 'server1' in server_item: server1_host = config.get('server1','hostname') else: mha_cnf_remark = mha_cnf_remark + 'Server1未配置;' if 'server2' in server_item: server2_host = config.get('server2','hostname') else: mha_cnf_remark = mha_cnf_remark + 'Server2未配置;' if 'server3' in server_item: server3_host = config.get('server3','hostname') ##print(mha_cnf_remark) except Exception as e: print(e) mha_status_result ='' ###20190330 Name = Name.replace(".cnf", "") ###集群一主一从 if server1_host <> '' and server2_host <> '' and server3_host == '': cmd_mha_status ='/?/?/bin/masterha_check_status --conf='+Pathname mha_status_result = mha_status.read() if 'running(0:PING_OK)' in mha_status_result: print(Name+':::'+Pathname+':::0:::'+server1_host+':::'+mha_status_result) print(Name+':::'+Pathname+':::0:::'+server2_host+':::'+mha_status_result) if 'stopped(2:NOT_RUNNING)' in mha_status_result: print(Name+':::'+Pathname+':::None:::'+server1_host+':::'+mha_status_result) print(Name+':::'+Pathname+':::None:::'+server2_host+':::'+mha_status_result) ####集群一主两从 if server1_host <> '' and server2_host <> '' and server3_host <> '': cmd_mha_status ='/?/?/bin/masterha_check_status --conf='+Pathname mha_status_result = mha_status.read() if 'running(0:PING_OK)' in mha_status_result: print(Name+':::'+Pathname+':::0:::'+server1_host+':::'+mha_status_result) print(Name+':::'+Pathname+':::0:::'+server2_host+':::'+mha_status_result) print(Name+':::'+Pathname+':::0:::'+server3_host+':::'+mha_status_result) if 'stopped(2:NOT_RUNNING)' in mha_status_result: print(Name+':::'+Pathname+':::None:::'+server1_host+':::'+mha_status_result) print(Name+':::'+Pathname+':::None:::'+server2_host+':::'+mha_status_result) print(Name+':::'+Pathname+':::None:::'+server3_host+':::'+mha_status_result)
概况说明,就是到存放MHA配置的文件夹,根据每个集群的配置文档,去逐一执行下masterha_check_status,把结果格式化,输出到指定的文件中.这个就是每个集群的状态数据.通过filebeat实时汇报上去.
触发的方式可以是crontab,每分钟执行一次.再本案中是输出到?/?/checkmhastatus/masterha_check_status.log 中.
形式类似如下:
*/1 * * * * python /?/?/mha_checkstatus.py >> /?/?/masterha_check_status.log
CREATE TABLE +dbmha_status+ ( +id+ int NOT NULL AUTO_INCREMENT, +host+ varchar(100) NOT NULL, +clustername+ varchar(200) NOT NULL, +logpath+ varchar(500) NOT NULL, +confpath+ varchar(500) NOT NULL, +mhstatus+ varchar(100) NOT NULL, +serverip+ varchar(100) NOT NULL, +info+ varchar(2000) NOT NULL, +create_time+ datetime DEFAULT CURRENT_TIMESTAMP COMMENT '收集时间', PRIMARY KEY (+id+), KEY +idx_createtime+ (+create_time+) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
CREATE TABLE +dbmha_log+ ( +id+ int NOT NULL AUTO_INCREMENT, +host+ varchar(100) NOT NULL, +clustername+ varchar(200) NOT NULL, +filename+ varchar(200) NOT NULL, +logpath+ varchar(500) NOT NULL, +message+ longtext NOT NULL, +create_time+ datetime DEFAULT CURRENT_TIMESTAMP COMMENT '收集时间', PRIMARY KEY (+id+) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
CREATE TABLE +dbmha_conf_info+ ( +id+ int NOT NULL AUTO_INCREMENT, +host+ varchar(100) NOT NULL, +clustername+ varchar(200) NOT NULL DEFAULT '', +confpath+ varchar(500) NOT NULL DEFAULT '', +manager_log+ varchar(500) NOT NULL DEFAULT '', +manager_workdir+ varchar(500) NOT NULL DEFAULT '', +master_binlog_dir+ varchar(500) NOT NULL DEFAULT '', +failover_script+ varchar(500) NOT NULL DEFAULT '', +online_change_script+ varchar(500) NOT NULL DEFAULT '', +password+ varchar(128) NOT NULL DEFAULT '', +ping_interval+ varchar(100) NOT NULL DEFAULT '', +remote_workdir+ varchar(100) NOT NULL DEFAULT '', +repl_password+ varchar(128) NOT NULL DEFAULT '', +repl_user+ varchar(20) NOT NULL DEFAULT '', +ssh_user+ varchar(20) NOT NULL DEFAULT '', +user+ varchar(20) NOT NULL DEFAULT '', +serverip1+ varchar(100) NOT NULL DEFAULT '', +port1+ varchar(10) NOT NULL DEFAULT '', +candidate_master1+ varchar(5) NOT NULL DEFAULT '', +check_repl_delay1+ varchar(20) NOT NULL DEFAULT '', +serverip2+ varchar(100) NOT NULL DEFAULT '', +port2+ varchar(10) NOT NULL DEFAULT '', +candidate_master2+ varchar(5) NOT NULL DEFAULT '', +check_repl_delay2+ varchar(20) NOT NULL DEFAULT '', +serverip3+ varchar(100) NOT NULL DEFAULT '', +port3+ varchar(10) NOT NULL DEFAULT '', +candidate_master3+ varchar(5) NOT NULL DEFAULT '', +check_repl_delay3+ varchar(20) NOT NULL DEFAULT '', +info+ longtext NOT NULL, +create_time+ datetime DEFAULT CURRENT_TIMESTAMP COMMENT '收集时间', PRIMARY KEY (+id+), KEY +idx_createtime+ (+create_time+) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
.............. - type: log paths: - /?/?/masterha_check_status.log fields: log_type: mha-status db_host: 111.111.XXX.1XX ###这个IP为mha Mnaager所在serverip - type: log paths: - /?/mhaconf/*.cnf fields: - /?/?/mha/*/*.log fields: log_type: mysql-mha db_host: 111.111.XXX.XXX ................
# Sample Logstash configuration for creating a simple # Beats -> Logstash -> Elasticsearch pipeline. input { beats { port => 5044 } } filter { if [fields][log_type] == "mysql-mha" { match => ["message", "(?m)^%{TIMESTAMP_ISO8601:timestamp} %{BASE10NUM} \[%{WORD:error_level}\] %{GREEDYDATA:error_msg}$"] match => { "[log][file][path]" => ".*(\\|\/).*(\\|\/)(?.*)(\\|\/).* "} match => { "[log][file][path]" => ".*(\\|\/).*(\\|\/).*(\\|\/)(?.*) "} date { match=> ["timestamp", "ISO8601"] remove_field => ["timestamp"] copy => { "[log][file][path]" => "logpath" "[fields][db_host]" => "manager_ip" } remove_field => ["@version", "beat", "input", "offset", "prospector", "source", "tags"] if [fields][log_type] == "mha-cnf" { split => ["message","server"] add_field => {"message1" => "%{[message][1]}"} add_field => {"messages1" => "%{[message][2]}"} add_field => {"messages2" => "%{[message][3]}"} add_field => {"messages3" => "%{[message][4]}"} add_field => {"dft_password" => "*********"} add_field => {"dft_repl_password" => "*********"} source => "message1" field_split => "\n" include_keys => ["manager_log", "manager_workdir", "master_binlog_dir", "master_ip_failover_script", "master_ip_online_change_script", "ping_interval", "remote_workdir", "repl_user", "ssh_user", "user" ] prefix => "dft_" remove_char_value => "<>\[\]," source => "messages1" field_split => "\n" include_keys => ["candidate_master", "check_repl_delay", "hostname", "port" ] prefix => "s1_" source => "messages2" field_split => "\n" default_keys => [ "s2_candidate_master", "", "s2_check_repl_delay", "", "s2_hostname","", "s2_port","" include_keys => ["candidate_master", "check_repl_delay", "hostname", "port" ] prefix => "s2_" source => "messages3" field_split => "\n" default_keys => [ "s3_candidate_master", "", "s3_check_repl_delay", "", "s3_hostname","", "s3_port","" include_keys => ["candidate_master", "check_repl_delay", "hostname", "port" ] prefix => "s3_" match => { "[log][file][path]" => ".*(\\|\/).*(\\|\/)(?.*)(\\|\/).* "} match => { "[log][file][path]" => ".*(\\|\/).*(\\|\/).*(\\|\/)(?.*) "} copy => { "[fields][db_host]" => "manager_ip" } copy => { "[log][file][path]" => "conf_path" } gsub => [ "message", "需要加密的***密***码", "*********", "message", "需要加密的其他字符", "*********" date { match=> ["timestamp", "ISO8601"] remove_field => ["timestamp"] remove_field => ["@version", "beat", "input", "offset", "prospector", "source", "tags"] if [fields][log_type] == "mha-status" { split => ["message",":::"] add_field => {"cluster_name" => "%{[message][0]}"} add_field => {"conf_path" => "%{[message][1]}"} add_field => {"masterha_check_status" => "%{[message][2]}"} add_field => {"server" => "%{[message][3]}"} add_field => {"info" => "%{[message][4]}"} match => { "[log][file][path]" => ".*(\\|\/).*(\\|\/).*(\\|\/)(?.*) "} copy => { "[fields][db_host]" => "manager_ip" } date { match=> ["timestamp", "ISO8601"] remove_field => ["timestamp"] remove_field => ["@version", "beat", "input", "offset", "prospector", "source", "tags"] if [fields][log_type] == "mysql-mha" { driver_jar_path => "/?/?/logstash-⑦⑥0/vendor/jar/jdbc/mysql-connector-java-⑤1.4⑦jar" driver_class => "com.mysql.jdbc.Driver" connection_string => "jdbc:mysql://120.120.XXX.XXX:3306/archery?user=ts23_dbhaclusterpassword=??" statement => ["INSERT INTO dbmha_log (host,clustername,filename,logpath, message) VALUES(?, ?, ?, ?, ?)","%{manager_ip}","%{product}","%{filename}","%{logpath}","%{message}"] if [fields][log_type] == "mha-status" { driver_jar_path => "/?/?/logstash-⑦⑥0/vendor/jar/jdbc/mysql-connector-java-⑤1.4⑦jar