-
Notifications
You must be signed in to change notification settings - Fork 7
/
gpu-benchmarks.json
69 lines (69 loc) · 2 KB
/
gpu-benchmarks.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
[
{
"date": "20240814-155812",
"provider": "scaleway",
"gpu": "NVIDIA H100",
"cost_per_hour": 3.02,
"backend": "sglang",
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tokenizer_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"traffic_request_rate": 200,
"successful_requests": 1000,
"benchmark_duration": 35.45,
"total_input_tokens": 215196,
"total_generated_tokens": 198343,
"total_generated_tokens_ret": 197967,
"request_throughput": 28.21,
"input_throughput": 6070.48,
"output_throughput": 5595.07,
"mean_e2e_latency": 14328.17,
"median_e2e_latency": 13759.53,
"mean_ttft": 773.1,
"median_ttft": 193.72,
"p99_ttft": 4118.65,
"mean_tpot": 145.31,
"median_tpot": 81.55,
"p99_tpot": 915.34,
"mean_itl": 71.79,
"median_itl": 49.35,
"p99_itl": 291.88
},
{
"date": "20240820-143000",
"provider": "aws",
"gpu": "NVIDIA A10G",
"cost_per_hour": 2.84,
"backend": "vllm",
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tokenizer_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"num_prompts": 1000,
"duration": 97.65,
"completed": 1000,
"total_input_tokens": 215196,
"total_output_tokens": 43140,
"request_throughput": 10.24,
"input_throughput": 2203.64,
"output_throughput": 441.76,
"issue": "https://github.com/arc53/llm-price-compass/issues/6"
},
{
"date": "20240813-143958",
"provider": "scaleway",
"gpu": "NVIDIA L4",
"cost_per_hour": 0.84,
"backend": "vllm",
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tokenizer_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"best_of": 1,
"use_beam_search": false,
"num_prompts": 1000,
"request_rate": "inf",
"duration": 385.30312793600024,
"completed": 1000,
"total_input_tokens": 215196,
"total_output_tokens": 197739,
"request_throughput": 2.5953591536015312,
"input_throughput": 558.5109084184351,
"output_throughput": 513.2037236740132
}
]