Keeping the benchmark somewhere
This commit is contained in:
parent
7865851c02
commit
f230da8d63
|
@ -36,7 +36,13 @@ serde_json = "1.0.107"
|
|||
slotmap = "1.0.7"
|
||||
thiserror = "1.0.48"
|
||||
tokenizers = { workspace = true }
|
||||
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
||||
tokio = { version = "1.32.0", features = [
|
||||
"rt",
|
||||
"rt-multi-thread",
|
||||
"parking_lot",
|
||||
"signal",
|
||||
"sync",
|
||||
] }
|
||||
tokio-stream = "0.1.14"
|
||||
tower-http = { version = "0.5.1", features = ["cors"] }
|
||||
tracing = "0.1.37"
|
||||
|
@ -44,7 +50,9 @@ tracing-opentelemetry = "0.21.0"
|
|||
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||
"opentelemetry-otlp",
|
||||
] }
|
||||
minijinja = { version = "2.0.2" }
|
||||
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
|
||||
futures-util = "0.3.30"
|
||||
|
@ -60,8 +68,16 @@ tower = "^0.4"
|
|||
tonic-build = "0.10.1"
|
||||
prost-build = "0.12.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
itertools = "0.13"
|
||||
|
||||
[features]
|
||||
default = ["ngrok"]
|
||||
ngrok = ["text-generation-router/ngrok"]
|
||||
google = ["text-generation-router/google"]
|
||||
kserve = ["text-generation-router/kserve"]
|
||||
|
||||
[[bench]]
|
||||
name = "prefix_cache"
|
||||
harness = false
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use itertools::Itertools;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
|
||||
use text_generation_router_v3::block_allocator::{Allocator, RadixAllocator};
|
||||
|
||||
fn prefix_cache_benchmark(c: &mut Criterion) {
|
||||
let prefixes: Vec<Vec<u32>> = (0..8192)
|
||||
.chunks(256)
|
||||
.into_iter()
|
||||
.map(|c| c.collect())
|
||||
.collect();
|
||||
|
||||
let mut cache = RadixAllocator::new(1, 262144, None);
|
||||
|
||||
c.bench_function("fib 20", |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
//prefixes
|
||||
// .choose_multiple(&mut rand::thread_rng(), 5)
|
||||
// .fold(Vec::new(), |mut v, s| {
|
||||
// v.extend(s);
|
||||
// v
|
||||
// })
|
||||
|
||||
(0..7936)
|
||||
.map(|_| rand::thread_rng().gen_range(0..1024))
|
||||
.collect::<Vec<u32>>()
|
||||
},
|
||||
|prefill| {
|
||||
let alloc = cache.allocate(prefill.len() as u32 + 13, Some(Arc::new(prefill)));
|
||||
if let Some(alloc) = alloc {
|
||||
cache.free(alloc.0, alloc.3);
|
||||
}
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, prefix_cache_benchmark);
|
||||
criterion_main!(benches);
|
|
@ -204,7 +204,7 @@ impl Allocator for SimpleAllocator {
|
|||
}
|
||||
}
|
||||
|
||||
struct RadixAllocator {
|
||||
pub struct RadixAllocator {
|
||||
allocation_id: u64,
|
||||
|
||||
allocations: HashMap<u64, RadixAllocation>,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
mod backend;
|
||||
mod block_allocator;
|
||||
pub mod block_allocator;
|
||||
mod client;
|
||||
mod queue;
|
||||
mod radix;
|
||||
|
|
|
@ -28,7 +28,13 @@ serde = "1.0.188"
|
|||
serde_json = "1.0.107"
|
||||
thiserror = "1.0.48"
|
||||
tokenizers = { workspace = true }
|
||||
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
||||
tokio = { version = "1.32.0", features = [
|
||||
"rt",
|
||||
"rt-multi-thread",
|
||||
"parking_lot",
|
||||
"signal",
|
||||
"sync",
|
||||
] }
|
||||
tokio-stream = "0.1.14"
|
||||
tower-http = { version = "0.5.1", features = ["cors"] }
|
||||
tracing = "0.1.40"
|
||||
|
@ -37,7 +43,9 @@ tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] }
|
|||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||
ngrok = { version = "0.13.1", features = ["axum"], optional = true }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||
"opentelemetry-otlp",
|
||||
] }
|
||||
minijinja = { version = "2.0.2" }
|
||||
minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
|
||||
futures-util = "0.3.30"
|
||||
|
@ -46,7 +54,11 @@ once_cell = "1.19.0"
|
|||
image = "0.25.1"
|
||||
base64 = { workspace = true }
|
||||
sysinfo = "0.30.13"
|
||||
uuid = { version = "1.9.1", default-features = false, features = ["v4", "fast-rng", "macro-diagnostics"] }
|
||||
uuid = { version = "1.9.1", default-features = false, features = [
|
||||
"v4",
|
||||
"fast-rng",
|
||||
"macro-diagnostics",
|
||||
] }
|
||||
csv = "1.3.0"
|
||||
ureq = "=2.9"
|
||||
|
||||
|
|
Loading…
Reference in New Issue