hf_text-generation-inference/load_tests/parse_load_test.py

132 lines
5.9 KiB
Python

import json
import os
import re
from enum import Enum
import pandas as pd
from loguru import logger
from matplotlib import pyplot as plt
import scienceplots
plt.style.use('science')
class TestType(Enum):
CONSTANT_VUS = "constant_vus"
CONSTANT_ARRIVAL_RATE = "constant_arrival_rate"
def get_nested(obj, path, default=None):
for key in path.split("."):
if obj is None:
return default
if isinstance(obj, list):
try:
key = int(key)
except ValueError:
return default
obj = obj[key]
elif isinstance(obj, dict):
obj = obj.get(key, default)
return obj
def parse_json_files(directory: str, test_type: TestType) -> pd.DataFrame:
metrics_to_keep = {'inter_token_latency': {'y': 'Time (ms)'}, 'end_to_end_latency': {'y': 'Time (ms)'},
'time_to_first_token': {'y': 'Time (ms)'}, 'tokens_throughput': {'y': 'Tokens/s'},
'tokens_received': {'y': 'Count'}}
df = pd.DataFrame()
for file in os.listdir(directory):
if file.endswith("summary.json"):
filepath = os.path.join(directory, file)
with open(filepath, 'r') as f:
data = json.load(f)
if test_type == TestType.CONSTANT_VUS:
entry = {
"vus": data['k6_config']['vus'],
"duration": data['k6_config']['duration']
}
elif test_type == TestType.CONSTANT_ARRIVAL_RATE:
entry = {
'pre_allocated_vus': data['k6_config']['pre_allocated_vus'],
'rate': data['k6_config']['rate'],
'duration': data['k6_config']['duration']
}
entry['input_type'] = data['k6_config']['input_type']
entry['test_duration'] = data['state']['testRunDurationMs'] / 1000.
entry['requests_ok'] = get_nested(data, 'root_group.checks.0.passes', 0)
entry['requests_fail'] = get_nested(data, 'root_group.checks.0.fails', 0)
entry['dropped_iterations'] = get_nested(data, 'metrics.dropped_iterations.values.count', 0)
# add up requests_fail and dropped_iterations to get total dropped requests
entry['dropped_requests'] = entry['requests_fail'] + entry['dropped_iterations']
entry['error_rate'] = entry['dropped_requests'] / (
entry['requests_ok'] + entry['dropped_requests']) * 100.0
entry['name'] = data['k6_config']['name']
for metric, values in sorted(data['metrics'].items()):
if metric in metrics_to_keep:
for value_key, value in values['values'].items():
if value_key == 'p(90)' or value_key == 'count': # Only keep p(90) values if trend
entry[metric] = value
if 'tokens_throughput' in entry and 'test_duration' in entry:
entry['tokens_throughput'] = entry['tokens_throughput'] / (entry['test_duration'])
if 'inter_token_latency' in entry:
entry['inter_token_latency'] = entry['inter_token_latency'] / 1000.
df = pd.concat([df, pd.DataFrame(entry, index=[0])])
return df
def plot_metrics(model_name: str, df: pd.DataFrame, test_type: TestType, save_name: str):
vus_param = ''
if test_type == TestType.CONSTANT_VUS:
vus_param = 'vus'
else:
vus_param = 'rate'
fig, axs = plt.subplots(3, 2, figsize=(15, 20))
fig.tight_layout(pad=6.0)
fig.subplots_adjust(hspace=0.2, wspace=0.2, bottom=0.15, top=0.92)
names = sorted(df['name'].unique())
metrics = {'inter_token_latency': {'y': 'Time (ms)'}, 'time_to_first_token': {'y': 'Time (ms)'},
'end_to_end_latency': {'y': 'Time (ms)'}, 'tokens_throughput': {'y': 'Tokens/s'},
'requests_ok': {'y': 'Count'}, 'error_rate': {'y': 'Count'}}
titles = ['Inter Token Latency P90 (lower is better)', 'TTFT P90 (lower is better)',
'End to End Latency P90 (lower is better)', 'Request Output Throughput P90 (higher is better)',
'Successful requests (higher is better)', 'Error rate (lower is better)']
labels = ['Time (ms)', 'Time (ms)', 'Time (ms)', 'Tokens/s', 'Count', '%']
colors = ['#FF9D00', '#2F5BA1']
# Plot each metric in its respective subplot
for ax, metric, title, label in zip(axs.flatten(), metrics, titles, labels):
for i, name in enumerate(names):
df_sorted = df[df['name'] == name].sort_values(by=vus_param)
ax.plot(df_sorted[vus_param], df_sorted[metric], marker='o', label=f"{name}", color=colors[i])
ax.set_title(title)
ax.tick_params(axis='x', rotation=0)
ax.set_ylabel(label)
if test_type == TestType.CONSTANT_VUS:
ax.set_xlabel('VUS')
else:
ax.set_xlabel('Requests/s')
# Add grid lines for better readability
ax.grid(True, which='both', axis='y', linestyle='--', linewidth=0.5)
ax.set_axisbelow(True) # Ensure grid lines are below the bars
ax.legend(title='Engine', loc='upper right')
# show title on top of the figure
if test_type == TestType.CONSTANT_VUS:
plt.suptitle(f'Constant VUs Load Test\n{model_name}', fontsize=16)
elif test_type == TestType.CONSTANT_ARRIVAL_RATE:
plt.suptitle(f'Constant Arrival Rate Load Test\n{model_name}', fontsize=16)
logger.info(f"Saving plot to {save_name}.png")
plt.savefig(f"{save_name}.png")
def main():
for test_type in [TestType.CONSTANT_VUS, TestType.CONSTANT_ARRIVAL_RATE]:
directory = f"results/{test_type.value.lower()}"
dfs = parse_json_files(directory, test_type)
plot_metrics(dfs, test_type, test_type.value.lower())
if __name__ == "__main__":
main()