hf_text-generation-inference/load_tests/parse_load_test.py

import json
import os
import re
from enum import Enum
import pandas as pd
from loguru import logger

from matplotlib import pyplot as plt
import scienceplots

plt.style.use('science')


class TestType(Enum):
    CONSTANT_VUS = "constant_vus"
    CONSTANT_ARRIVAL_RATE = "constant_arrival_rate"


def get_nested(obj, path, default=None):
    for key in path.split("."):
        if obj is None:
            return default
        if isinstance(obj, list):
            try:
                key = int(key)
            except ValueError:
                return default
            obj = obj[key]
        elif isinstance(obj, dict):
            obj = obj.get(key, default)
    return obj


def parse_json_files(directory: str, test_type: TestType) -> pd.DataFrame:
    metrics_to_keep = {'inter_token_latency': {'y': 'Time (ms)'}, 'end_to_end_latency': {'y': 'Time (ms)'},
                       'time_to_first_token': {'y': 'Time (ms)'}, 'tokens_throughput': {'y': 'Tokens/s'},
                       'tokens_received': {'y': 'Count'}}
    df = pd.DataFrame()
    for file in os.listdir(directory):
        if file.endswith("summary.json"):
            filepath = os.path.join(directory, file)
            with open(filepath, 'r') as f:
                data = json.load(f)
                if test_type == TestType.CONSTANT_VUS:
                    entry = {
                        "vus": data['k6_config']['vus'],
                        "duration": data['k6_config']['duration']
                    }
                elif test_type == TestType.CONSTANT_ARRIVAL_RATE:
                    entry = {
                        'pre_allocated_vus': data['k6_config']['pre_allocated_vus'],
                        'rate': data['k6_config']['rate'],
                        'duration': data['k6_config']['duration']
                    }
                entry['input_type'] = data['k6_config']['input_type']
                entry['test_duration'] = data['state']['testRunDurationMs'] / 1000.
                entry['requests_ok'] = get_nested(data, 'root_group.checks.0.passes', 0)
                entry['requests_fail'] = get_nested(data, 'root_group.checks.0.fails', 0)
                entry['dropped_iterations'] = get_nested(data, 'metrics.dropped_iterations.values.count', 0)
                # add up requests_fail and dropped_iterations to get total dropped requests
                entry['dropped_requests'] = entry['requests_fail'] + entry['dropped_iterations']
                entry['error_rate'] = entry['dropped_requests'] / (
                        entry['requests_ok'] + entry['dropped_requests']) * 100.0
                entry['name'] = data['k6_config']['name']
                for metric, values in sorted(data['metrics'].items()):
                    if metric in metrics_to_keep:
                        for value_key, value in values['values'].items():
                            if value_key == 'p(90)' or value_key == 'count':  # Only keep p(90) values if trend
                                entry[metric] = value
                if 'tokens_throughput' in entry and 'test_duration' in entry:
                    entry['tokens_throughput'] = entry['tokens_throughput'] / (entry['test_duration'])
                if 'inter_token_latency' in entry:
                    entry['inter_token_latency'] = entry['inter_token_latency'] / 1000.
                df = pd.concat([df, pd.DataFrame(entry, index=[0])])
    return df


def plot_metrics(model_name: str, df: pd.DataFrame, test_type: TestType, save_name: str):
    vus_param = ''
    if test_type == TestType.CONSTANT_VUS:
        vus_param = 'vus'
    else:
        vus_param = 'rate'
    fig, axs = plt.subplots(3, 2, figsize=(15, 20))
    fig.tight_layout(pad=6.0)
    fig.subplots_adjust(hspace=0.2, wspace=0.2, bottom=0.15, top=0.92)

    names = sorted(df['name'].unique())
    metrics = {'inter_token_latency': {'y': 'Time (ms)'}, 'time_to_first_token': {'y': 'Time (ms)'},
               'end_to_end_latency': {'y': 'Time (ms)'}, 'tokens_throughput': {'y': 'Tokens/s'},
               'requests_ok': {'y': 'Count'}, 'error_rate': {'y': 'Count'}}
    titles = ['Inter Token Latency P90 (lower is better)', 'TTFT P90 (lower is better)',
              'End to End Latency P90 (lower is better)', 'Request Output Throughput P90 (higher is better)',
              'Successful requests (higher is better)', 'Error rate (lower is better)']
    labels = ['Time (ms)', 'Time (ms)', 'Time (ms)', 'Tokens/s', 'Count', '%']
    colors = ['#FF9D00', '#2F5BA1']
    # Plot each metric in its respective subplot
    for ax, metric, title, label in zip(axs.flatten(), metrics, titles, labels):
        for i, name in enumerate(names):
            df_sorted = df[df['name'] == name].sort_values(by=vus_param)
            ax.plot(df_sorted[vus_param], df_sorted[metric], marker='o', label=f"{name}", color=colors[i])
            ax.set_title(title)
            ax.tick_params(axis='x', rotation=0)
            ax.set_ylabel(label)
            if test_type == TestType.CONSTANT_VUS:
                ax.set_xlabel('VUS')
            else:
                ax.set_xlabel('Requests/s')
            # Add grid lines for better readability
            ax.grid(True, which='both', axis='y', linestyle='--', linewidth=0.5)
            ax.set_axisbelow(True)  # Ensure grid lines are below the bars
            ax.legend(title='Engine', loc='upper right')

    # show title on top of the figure
    if test_type == TestType.CONSTANT_VUS:
        plt.suptitle(f'Constant VUs Load Test\n{model_name}', fontsize=16)
    elif test_type == TestType.CONSTANT_ARRIVAL_RATE:
        plt.suptitle(f'Constant Arrival Rate Load Test\n{model_name}', fontsize=16)
    logger.info(f"Saving plot to {save_name}.png")
    plt.savefig(f"{save_name}.png")


def main():
    for test_type in [TestType.CONSTANT_VUS, TestType.CONSTANT_ARRIVAL_RATE]:
        directory = f"results/{test_type.value.lower()}"
        dfs = parse_json_files(directory, test_type)
        plot_metrics(dfs, test_type, test_type.value.lower())


if __name__ == "__main__":
    main()