Keeping the benchmark somewhere
This commit is contained in:
parent
7865851c02
commit
f230da8d63
|
@ -35,8 +35,14 @@ serde = "1.0.188"
|
||||||
serde_json = "1.0.107"
|
serde_json = "1.0.107"
|
||||||
slotmap = "1.0.7"
|
slotmap = "1.0.7"
|
||||||
thiserror = "1.0.48"
|
thiserror = "1.0.48"
|
||||||
tokenizers = { workspace = true}
|
tokenizers = { workspace = true }
|
||||||
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.32.0", features = [
|
||||||
|
"rt",
|
||||||
|
"rt-multi-thread",
|
||||||
|
"parking_lot",
|
||||||
|
"signal",
|
||||||
|
"sync",
|
||||||
|
] }
|
||||||
tokio-stream = "0.1.14"
|
tokio-stream = "0.1.14"
|
||||||
tower-http = { version = "0.5.1", features = ["cors"] }
|
tower-http = { version = "0.5.1", features = ["cors"] }
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
|
@ -44,7 +50,9 @@ tracing-opentelemetry = "0.21.0"
|
||||||
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||||
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
|
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||||
|
"opentelemetry-otlp",
|
||||||
|
] }
|
||||||
minijinja = { version = "2.0.2" }
|
minijinja = { version = "2.0.2" }
|
||||||
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
|
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
|
||||||
futures-util = "0.3.30"
|
futures-util = "0.3.30"
|
||||||
|
@ -60,8 +68,16 @@ tower = "^0.4"
|
||||||
tonic-build = "0.10.1"
|
tonic-build = "0.10.1"
|
||||||
prost-build = "0.12.1"
|
prost-build = "0.12.1"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
criterion = "0.3"
|
||||||
|
itertools = "0.13"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["ngrok"]
|
default = ["ngrok"]
|
||||||
ngrok = ["text-generation-router/ngrok"]
|
ngrok = ["text-generation-router/ngrok"]
|
||||||
google = ["text-generation-router/google"]
|
google = ["text-generation-router/google"]
|
||||||
kserve = ["text-generation-router/kserve"]
|
kserve = ["text-generation-router/kserve"]
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "prefix_cache"
|
||||||
|
harness = false
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
use itertools::Itertools;
|
||||||
|
use rand::seq::SliceRandom;
|
||||||
|
use rand::Rng;
|
||||||
|
|
||||||
|
use text_generation_router_v3::block_allocator::{Allocator, RadixAllocator};
|
||||||
|
|
||||||
|
fn prefix_cache_benchmark(c: &mut Criterion) {
|
||||||
|
let prefixes: Vec<Vec<u32>> = (0..8192)
|
||||||
|
.chunks(256)
|
||||||
|
.into_iter()
|
||||||
|
.map(|c| c.collect())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut cache = RadixAllocator::new(1, 262144, None);
|
||||||
|
|
||||||
|
c.bench_function("fib 20", |b| {
|
||||||
|
b.iter_batched(
|
||||||
|
|| {
|
||||||
|
//prefixes
|
||||||
|
// .choose_multiple(&mut rand::thread_rng(), 5)
|
||||||
|
// .fold(Vec::new(), |mut v, s| {
|
||||||
|
// v.extend(s);
|
||||||
|
// v
|
||||||
|
// })
|
||||||
|
|
||||||
|
(0..7936)
|
||||||
|
.map(|_| rand::thread_rng().gen_range(0..1024))
|
||||||
|
.collect::<Vec<u32>>()
|
||||||
|
},
|
||||||
|
|prefill| {
|
||||||
|
let alloc = cache.allocate(prefill.len() as u32 + 13, Some(Arc::new(prefill)));
|
||||||
|
if let Some(alloc) = alloc {
|
||||||
|
cache.free(alloc.0, alloc.3);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
criterion::BatchSize::SmallInput,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, prefix_cache_benchmark);
|
||||||
|
criterion_main!(benches);
|
|
@ -204,7 +204,7 @@ impl Allocator for SimpleAllocator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct RadixAllocator {
|
pub struct RadixAllocator {
|
||||||
allocation_id: u64,
|
allocation_id: u64,
|
||||||
|
|
||||||
allocations: HashMap<u64, RadixAllocation>,
|
allocations: HashMap<u64, RadixAllocation>,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
mod backend;
|
mod backend;
|
||||||
mod block_allocator;
|
pub mod block_allocator;
|
||||||
mod client;
|
mod client;
|
||||||
mod queue;
|
mod queue;
|
||||||
mod radix;
|
mod radix;
|
||||||
|
|
|
@ -27,8 +27,14 @@ reqwest = { version = "0.11.20", features = [] }
|
||||||
serde = "1.0.188"
|
serde = "1.0.188"
|
||||||
serde_json = "1.0.107"
|
serde_json = "1.0.107"
|
||||||
thiserror = "1.0.48"
|
thiserror = "1.0.48"
|
||||||
tokenizers = { workspace = true}
|
tokenizers = { workspace = true }
|
||||||
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.32.0", features = [
|
||||||
|
"rt",
|
||||||
|
"rt-multi-thread",
|
||||||
|
"parking_lot",
|
||||||
|
"signal",
|
||||||
|
"sync",
|
||||||
|
] }
|
||||||
tokio-stream = "0.1.14"
|
tokio-stream = "0.1.14"
|
||||||
tower-http = { version = "0.5.1", features = ["cors"] }
|
tower-http = { version = "0.5.1", features = ["cors"] }
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
|
@ -37,7 +43,9 @@ tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] }
|
||||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||||
ngrok = { version = "0.13.1", features = ["axum"], optional = true }
|
ngrok = { version = "0.13.1", features = ["axum"], optional = true }
|
||||||
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
|
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||||
|
"opentelemetry-otlp",
|
||||||
|
] }
|
||||||
minijinja = { version = "2.0.2" }
|
minijinja = { version = "2.0.2" }
|
||||||
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
|
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
|
||||||
futures-util = "0.3.30"
|
futures-util = "0.3.30"
|
||||||
|
@ -46,7 +54,11 @@ once_cell = "1.19.0"
|
||||||
image = "0.25.1"
|
image = "0.25.1"
|
||||||
base64 = { workspace = true }
|
base64 = { workspace = true }
|
||||||
sysinfo = "0.30.13"
|
sysinfo = "0.30.13"
|
||||||
uuid = { version = "1.9.1", default-features = false, features = ["v4", "fast-rng", "macro-diagnostics"] }
|
uuid = { version = "1.9.1", default-features = false, features = [
|
||||||
|
"v4",
|
||||||
|
"fast-rng",
|
||||||
|
"macro-diagnostics",
|
||||||
|
] }
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
ureq = "=2.9"
|
ureq = "=2.9"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue