Skip to content

Instantly share code, notes, and snippets.

@freemandealer
Last active June 25, 2024 06:50
Show Gist options
  • Save freemandealer/e54252777d3333a9ace2f7594faa00b6 to your computer and use it in GitHub Desktop.
Save freemandealer/e54252777d3333a9ace2f7594faa00b6 to your computer and use it in GitHub Desktop.
通过性能平台的 stepid 获取对应测试的 log 和指定 sql 的 profile
#/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2024/06/01
# @Author : zhengyu (freeman.zhang1992@gmail.com)
# @Description: 通过性能平台的 stepid 获取对应测试的 log 和指定 sql 的 profile
# 用法:python3 perf.py --stepid=stepid --sql=sql --idx=idx --output=output
# 必要参数:
# stepid: stepid
# sql: 如 q09、q19 (tpch格式)、query01、query68 (tpcds格式)
# 可选参数:
# idx: 如果sql被执行多次,可以指定idx。可以为:0(代表冷读)、1,2,3,默认 0
# output: 输出文件名的前缀,可以自由指定,默认值空
import paramiko
import re
from bs4 import BeautifulSoup
import requests
import argparse
import mysql.connector
from mysql.connector import Error
# MySQL 数据库配置信息
dbhost = '172.20.48.32' # 例如 'localhost' 或数据库服务器的 IP 地址
database = 'platform' # 你要连接的数据库名
dbuser = 'root' # 数据库用户名
dbpassword = 'xxx' # 数据库用户密码
dbport = 3306
# log server
host = '172.20.48.32'
username = "root"
password = "xxx"
def query_meta_base(query):
try:
connection = mysql.connector.connect(host=dbhost,
database=database,
user=dbuser,
password=dbpassword,
port=dbport)
if connection.is_connected():
cursor = connection.cursor()
cursor.execute(query)
results = cursor.fetchall()
for row in results:
print(row)
cursor.close()
except Error as e:
print(f"Error: {e}")
finally:
if connection.is_connected():
connection.close()
print("MySQL connection is closed")
return results
def get_workflowid_and_date_from_stepid(stepid):
query_template = '''
SELECT
`steps`.`workflow_uid` AS `workflow_uid`,
`steps`.`start_at` AS `start_at`
FROM
`steps`
WHERE
`steps`.`uid` = '{stepid}'
LIMIT
100
'''
query = query_meta_base(query_template.format(stepid=stepid))
if (query == None) or (len(query) == 0) or (query[0] == None) or (query[0][0] == None) or (query[0][1] == None):
return None, None
return query[0][0], query[0][1].date() # workflowid, date (yy-mm-dd no hour and minute)
def get_workflow_log_location(date, workflowid, stepid):
location_template = "/mnt/hdd01/performance/logs/{date}/{workflowid}/{stepid}"
return location_template.format(date=date, workflowid=workflowid, stepid=stepid)
def download_file(server_ip, username, password, remote_file_path, local_file_path):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(server_ip, username=username, password=password)
ftp = ssh.open_sftp()
ftp.get(remote_file_path, local_file_path)
ftp.close()
def download_log_file(remote_log_location, stepid):
save_location = "/tmp/{stepid}.log".format(stepid=stepid)
download_file(host, username, password, remote_log_location, save_location)
return save_location
def get_profile_from_log(save_location, sql, idx):
ret_url = ""
with open(save_location, "r") as f:
lines = f.readlines()
for line in lines:
if f"{sql}.sql_{idx}_profile.html" in line:
print(line)
ret_url = line
break
if ret_url == "":
return ret_url
else:
url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
url = re.findall(url_pattern, ret_url)
return url[0]
def download_profile(url, output_path):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# 获取网页的可视化文本
visible_text = soup.get_text('\n')
# 将文本保存到文件
with open(output_path, "w") as f:
f.write(visible_text)
def main(stepid, sql, idx="0", output=""):
workflowid, date = get_workflowid_and_date_from_stepid(stepid)
if (workflowid == None) or (date == None):
print("Workflow ID not found, bad stepid?")
return
else:
print("workflowid:", workflowid, " date:", date)
remote_log_location = get_workflow_log_location(date, workflowid, stepid)
print("log location:", remote_log_location)
save_location = download_log_file(remote_log_location, stepid)
print("Downloaded log file to:", save_location)
profile_url = get_profile_from_log(save_location, sql, idx)
if profile_url == "":
print("Profile URL not found")
return
else:
print("Profile URL:", profile_url)
output_path = "/tmp/{output}_{stepid}_{sql}_{idx}.txt".format(output=output,
stepid=stepid,
sql=sql,
idx=idx)
download_profile(profile_url, output_path)
print("Downloaded profile to:", output_path)
return
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process input parameters.')
parser.add_argument('--stepid', required=True, help='Step ID')
parser.add_argument('--sql', required=True, help='Query SQL')
parser.add_argument('--idx', default='0', help='Query Idx of the same SQL (optional)')
parser.add_argument('--output', default='', help='Output (optional)')
args = parser.parse_args()
main(args.stepid, args.sql, args.idx, args.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment