Keeping the benchmark somewhere

This commit is contained in:
Daniël de Kok 2024-08-06 12:36:15 +00:00
parent 7865851c02
commit f230da8d63
5 changed files with 82 additions and 9 deletions

View File

@ -35,8 +35,14 @@ serde = "1.0.188"
serde_json = "1.0.107" serde_json = "1.0.107"
slotmap = "1.0.7" slotmap = "1.0.7"
thiserror = "1.0.48" thiserror = "1.0.48"
tokenizers = { workspace = true} tokenizers = { workspace = true }
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] } tokio = { version = "1.32.0", features = [
"rt",
"rt-multi-thread",
"parking_lot",
"signal",
"sync",
] }
tokio-stream = "0.1.14" tokio-stream = "0.1.14"
tower-http = { version = "0.5.1", features = ["cors"] } tower-http = { version = "0.5.1", features = ["cors"] }
tracing = "0.1.37" tracing = "0.1.37"
@ -44,7 +50,9 @@ tracing-opentelemetry = "0.21.0"
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] } tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
utoipa = { version = "4.2.0", features = ["axum_extras"] } utoipa = { version = "4.2.0", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] } utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] } init-tracing-opentelemetry = { version = "0.14.1", features = [
"opentelemetry-otlp",
] }
minijinja = { version = "2.0.2" } minijinja = { version = "2.0.2" }
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] } minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
futures-util = "0.3.30" futures-util = "0.3.30"
@ -60,8 +68,16 @@ tower = "^0.4"
tonic-build = "0.10.1" tonic-build = "0.10.1"
prost-build = "0.12.1" prost-build = "0.12.1"
[dev-dependencies]
criterion = "0.3"
itertools = "0.13"
[features] [features]
default = ["ngrok"] default = ["ngrok"]
ngrok = ["text-generation-router/ngrok"] ngrok = ["text-generation-router/ngrok"]
google = ["text-generation-router/google"] google = ["text-generation-router/google"]
kserve = ["text-generation-router/kserve"] kserve = ["text-generation-router/kserve"]
[[bench]]
name = "prefix_cache"
harness = false

View File

@ -0,0 +1,45 @@
use std::sync::Arc;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use itertools::Itertools;
use rand::seq::SliceRandom;
use rand::Rng;
use text_generation_router_v3::block_allocator::{Allocator, RadixAllocator};
fn prefix_cache_benchmark(c: &mut Criterion) {
let prefixes: Vec<Vec<u32>> = (0..8192)
.chunks(256)
.into_iter()
.map(|c| c.collect())
.collect();
let mut cache = RadixAllocator::new(1, 262144, None);
c.bench_function("fib 20", |b| {
b.iter_batched(
|| {
//prefixes
// .choose_multiple(&mut rand::thread_rng(), 5)
// .fold(Vec::new(), |mut v, s| {
// v.extend(s);
// v
// })
(0..7936)
.map(|_| rand::thread_rng().gen_range(0..1024))
.collect::<Vec<u32>>()
},
|prefill| {
let alloc = cache.allocate(prefill.len() as u32 + 13, Some(Arc::new(prefill)));
if let Some(alloc) = alloc {
cache.free(alloc.0, alloc.3);
}
},
criterion::BatchSize::SmallInput,
);
});
}
criterion_group!(benches, prefix_cache_benchmark);
criterion_main!(benches);

View File

@ -204,7 +204,7 @@ impl Allocator for SimpleAllocator {
} }
} }
struct RadixAllocator { pub struct RadixAllocator {
allocation_id: u64, allocation_id: u64,
allocations: HashMap<u64, RadixAllocation>, allocations: HashMap<u64, RadixAllocation>,

View File

@ -1,5 +1,5 @@
mod backend; mod backend;
mod block_allocator; pub mod block_allocator;
mod client; mod client;
mod queue; mod queue;
mod radix; mod radix;

View File

@ -27,8 +27,14 @@ reqwest = { version = "0.11.20", features = [] }
serde = "1.0.188" serde = "1.0.188"
serde_json = "1.0.107" serde_json = "1.0.107"
thiserror = "1.0.48" thiserror = "1.0.48"
tokenizers = { workspace = true} tokenizers = { workspace = true }
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] } tokio = { version = "1.32.0", features = [
"rt",
"rt-multi-thread",
"parking_lot",
"signal",
"sync",
] }
tokio-stream = "0.1.14" tokio-stream = "0.1.14"
tower-http = { version = "0.5.1", features = ["cors"] } tower-http = { version = "0.5.1", features = ["cors"] }
tracing = "0.1.40" tracing = "0.1.40"
@ -37,7 +43,9 @@ tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] }
utoipa = { version = "4.2.0", features = ["axum_extras"] } utoipa = { version = "4.2.0", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] } utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
ngrok = { version = "0.13.1", features = ["axum"], optional = true } ngrok = { version = "0.13.1", features = ["axum"], optional = true }
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] } init-tracing-opentelemetry = { version = "0.14.1", features = [
"opentelemetry-otlp",
] }
minijinja = { version = "2.0.2" } minijinja = { version = "2.0.2" }
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] } minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
futures-util = "0.3.30" futures-util = "0.3.30"
@ -46,7 +54,11 @@ once_cell = "1.19.0"
image = "0.25.1" image = "0.25.1"
base64 = { workspace = true } base64 = { workspace = true }
sysinfo = "0.30.13" sysinfo = "0.30.13"
uuid = { version = "1.9.1", default-features = false, features = ["v4", "fast-rng", "macro-diagnostics"] } uuid = { version = "1.9.1", default-features = false, features = [
"v4",
"fast-rng",
"macro-diagnostics",
] }
csv = "1.3.0" csv = "1.3.0"
ureq = "=2.9" ureq = "=2.9"