diff --git a/Cargo.lock b/Cargo.lock
index 1dcc7c97..62896a51 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -249,6 +249,12 @@ version = "3.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
 
+[[package]]
+name = "bytecount"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
+
 [[package]]
 name = "byteorder"
 version = "1.4.3"
@@ -1706,6 +1712,17 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
 
+[[package]]
+name = "papergrid"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fdfe703c51ddc52887ad78fc69cd2ea78d895ffcd6e955c9d03566db8ab5bb1"
+dependencies = [
+ "bytecount",
+ "fnv",
+ "unicode-width",
+]
+
 [[package]]
 name = "parking_lot"
 version = "0.12.1"
@@ -2490,6 +2507,30 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "tabled"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da1a2e56bbf7bfdd08aaa7592157a742205459eff774b73bc01809ae2d99dc2a"
+dependencies = [
+ "papergrid",
+ "tabled_derive",
+ "unicode-width",
+]
+
+[[package]]
+name = "tabled_derive"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99f688a08b54f4f02f0a3c382aefdb7884d3d69609f785bd253dc033243e3fe4"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "tar"
 version = "0.4.38"
@@ -2525,6 +2566,7 @@ dependencies = [
  "ratatui",
  "serde",
  "serde_json",
+ "tabled",
  "text-generation-client",
  "thiserror",
  "tokenizers",
diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml
index a4e215fc..67e04f0a 100644
--- a/benchmark/Cargo.toml
+++ b/benchmark/Cargo.toml
@@ -20,6 +20,7 @@ crossterm = "0.26"
 float-ord = "0.3.2"
 serde = {version = "1.0.142", features = ["derive"]}
 serde_json = "1.0"
+tabled = "0.12.0"
 text-generation-client = { path = "../router/client" }
 thiserror = "1.0.38"
 tokenizers = "0.13.3"
diff --git a/benchmark/src/app.rs b/benchmark/src/app.rs
index 85026a33..6a9881fb 100644
--- a/benchmark/src/app.rs
+++ b/benchmark/src/app.rs
@@ -15,6 +15,7 @@ use tui::{symbols, Frame};
 /// TUI powered App
 pub(crate) struct App {
     pub(crate) running: bool,
+    pub(crate) data: Data,
     completed_runs: Vec<usize>,
     completed_batch: usize,
     current_batch: usize,
@@ -22,12 +23,10 @@ pub(crate) struct App {
     touched_tab: bool,
     zoom: bool,
     is_error: bool,
-    data: Data,
     tokenizer_name: String,
     sequence_length: u32,
     decode_length: u32,
     n_run: usize,
-    batch_size: Vec<u32>,
     receiver: mpsc::Receiver<Result<Message, ClientError>>,
 }
 
@@ -40,7 +39,6 @@ impl App {
         n_run: usize,
         batch_size: Vec<u32>,
     ) -> Self {
-        let data = Data::new(n_run, batch_size.len());
         let current_tab = 0;
 
         let completed_runs: Vec<usize> = (0..batch_size.len()).map(|_| 0).collect();
@@ -48,8 +46,11 @@ impl App {
         let current_batch = 0;
         let is_error = false;
 
+        let data = Data::new(n_run, batch_size);
+
         Self {
             running: true,
+            data,
             completed_runs,
             completed_batch,
             current_batch,
@@ -57,12 +58,10 @@ impl App {
             touched_tab: false,
             zoom: false,
             is_error,
-            data,
             tokenizer_name,
             sequence_length,
             decode_length,
             n_run,
-            batch_size,
             receiver,
         }
     }
@@ -79,7 +78,7 @@ impl App {
                 code: KeyCode::Tab, ..
             } => {
                 self.touched_tab = true;
-                self.current_tab = (self.current_tab + 1) % self.batch_size.len();
+                self.current_tab = (self.current_tab + 1) % self.data.batch_size.len();
             }
             // Decrease and wrap tab
             KeyEvent {
@@ -90,7 +89,7 @@ impl App {
                 if self.current_tab > 0 {
                     self.current_tab -= 1;
                 } else {
-                    self.current_tab = self.batch_size.len() - 1;
+                    self.current_tab = self.data.batch_size.len() - 1;
                 }
             }
             // Zoom on throughput/latency fig
@@ -137,7 +136,7 @@ impl App {
                         self.data.end_batch(self.current_batch);
                         self.completed_batch += 1;
 
-                        if self.current_batch < self.batch_size.len() - 1 {
+                        if self.current_batch < self.data.batch_size.len() - 1 {
                             // Only go to next tab if the user never touched the tab keys
                             if !self.touched_tab {
                                 self.current_tab += 1;
@@ -156,7 +155,7 @@ impl App {
     /// Render frame
     pub fn render<B: Backend>(&mut self, f: &mut Frame<'_, B>) {
         let batch_progress =
-            (self.completed_batch as f64 / self.batch_size.len() as f64).clamp(0.0, 1.0);
+            (self.completed_batch as f64 / self.data.batch_size.len() as f64).clamp(0.0, 1.0);
         let run_progress =
             (self.completed_runs[self.current_batch] as f64 / self.n_run as f64).clamp(0.0, 1.0);
 
@@ -241,6 +240,7 @@ impl App {
 
         // Batch tabs
         let titles = self
+            .data
             .batch_size
             .iter()
             .map(|b| {
@@ -269,7 +269,7 @@ impl App {
         };
         let batch_gauge = progress_gauge(
             "Total Progress",
-            format!("{} / {}", self.completed_batch, self.batch_size.len()),
+            format!("{} / {}", self.completed_batch, self.data.batch_size.len()),
             batch_progress,
             color,
         );
@@ -347,7 +347,7 @@ impl App {
         // Prefill latency/throughput chart
         let prefill_latency_throughput_chart = latency_throughput_chart(
             &self.data.prefill_batch_latency_throughput,
-            &self.batch_size,
+            &self.data.batch_size,
             self.zoom,
             "Prefill",
         );
@@ -356,7 +356,7 @@ impl App {
         // Decode latency/throughput chart
         let decode_latency_throughput_chart = latency_throughput_chart(
             &self.data.decode_batch_latency_throughput,
-            &self.batch_size,
+            &self.data.batch_size,
             self.zoom,
             "Decode",
         );
@@ -365,31 +365,35 @@ impl App {
 }
 
 /// App internal data struct
-struct Data {
-    prefill_latencies: Vec<Vec<f64>>,
-    prefill_throughputs: Vec<Vec<f64>>,
-    decode_latencies: Vec<Vec<f64>>,
-    decode_token_latencies: Vec<Vec<f64>>,
-    decode_throughputs: Vec<Vec<f64>>,
-    prefill_batch_latency_throughput: Vec<(f64, f64)>,
-    decode_batch_latency_throughput: Vec<(f64, f64)>,
+pub(crate) struct Data {
+    pub(crate) batch_size: Vec<u32>,
+    pub(crate) prefill_latencies: Vec<Vec<f64>>,
+    pub(crate) prefill_throughputs: Vec<Vec<f64>>,
+    pub(crate) decode_latencies: Vec<Vec<f64>>,
+    pub(crate) decode_token_latencies: Vec<Vec<f64>>,
+    pub(crate) decode_throughputs: Vec<Vec<f64>>,
+    pub(crate) prefill_batch_latency_throughput: Vec<(f64, f64)>,
+    pub(crate) decode_batch_latency_throughput: Vec<(f64, f64)>,
 }
 
 impl Data {
-    fn new(n_run: usize, n_batch: usize) -> Self {
-        let prefill_latencies: Vec<Vec<f64>> =
-            (0..n_batch).map(|_| Vec::with_capacity(n_run)).collect();
+    fn new(n_run: usize, batch_size: Vec<u32>) -> Self {
+        let prefill_latencies: Vec<Vec<f64>> = (0..batch_size.len())
+            .map(|_| Vec::with_capacity(n_run))
+            .collect();
         let prefill_throughputs: Vec<Vec<f64>> = prefill_latencies.clone();
 
         let decode_latencies: Vec<Vec<f64>> = prefill_latencies.clone();
         let decode_token_latencies: Vec<Vec<f64>> = decode_latencies.clone();
         let decode_throughputs: Vec<Vec<f64>> = prefill_throughputs.clone();
 
-        let prefill_batch_latency_throughput: Vec<(f64, f64)> = Vec::with_capacity(n_batch);
+        let prefill_batch_latency_throughput: Vec<(f64, f64)> =
+            Vec::with_capacity(batch_size.len());
         let decode_batch_latency_throughput: Vec<(f64, f64)> =
             prefill_batch_latency_throughput.clone();
 
         Self {
+            batch_size,
             prefill_latencies,
             prefill_throughputs,
             decode_latencies,
@@ -401,14 +405,14 @@ impl Data {
     }
 
     fn push_prefill(&mut self, prefill: Prefill, batch_idx: usize) {
-        let latency = prefill.latency.as_millis() as f64;
+        let latency = prefill.latency.as_micros() as f64 / 1000.0;
         self.prefill_latencies[batch_idx].push(latency);
         self.prefill_throughputs[batch_idx].push(prefill.throughput);
     }
 
     fn push_decode(&mut self, decode: Decode, batch_idx: usize) {
-        let latency = decode.latency.as_millis() as f64;
-        let token_latency = decode.token_latency.as_millis() as f64;
+        let latency = decode.latency.as_micros() as f64 / 1000.0;
+        let token_latency = decode.token_latency.as_micros() as f64 / 1000.0;
         self.decode_latencies[batch_idx].push(latency);
         self.decode_token_latencies[batch_idx].push(token_latency);
         self.decode_throughputs[batch_idx].push(decode.throughput);
diff --git a/benchmark/src/generation.rs b/benchmark/src/generation.rs
index d40a2e8d..17c72d26 100644
--- a/benchmark/src/generation.rs
+++ b/benchmark/src/generation.rs
@@ -39,6 +39,7 @@ pub(crate) async fn generation_task(
     decode_length: u32,
     n_runs: usize,
     warmups: usize,
+    parameters: NextTokenChooserParameters,
     client: ShardedClient,
     run_sender: mpsc::Sender<Result<Message, ClientError>>,
     mut shutdown_receiver: broadcast::Receiver<()>,
@@ -47,7 +48,7 @@ pub(crate) async fn generation_task(
     // End task if a message is received on shutdown_receiver
     // _shutdown_guard_sender will be dropped once the task is finished
     tokio::select! {
-        res = generate_runs(tokenizer, batch_size, sequence_length, decode_length, n_runs, warmups, client, run_sender.clone())  => {
+        res = generate_runs(tokenizer, batch_size, sequence_length, decode_length, n_runs, warmups, parameters, client, run_sender.clone())  => {
             if let Err(err) = res {
                 run_sender.send(Err(err)).await.unwrap_or(());
             }
@@ -65,6 +66,7 @@ async fn generate_runs(
     decode_length: u32,
     n_runs: usize,
     warmups: usize,
+    parameters: NextTokenChooserParameters,
     mut client: ShardedClient,
     run_sender: mpsc::Sender<Result<Message, ClientError>>,
 ) -> Result<(), ClientError> {
@@ -79,6 +81,7 @@ async fn generate_runs(
                 sequence_length,
                 b,
                 decode_length,
+                parameters.clone(),
                 &mut client,
             )
             .await?;
@@ -93,6 +96,7 @@ async fn generate_runs(
                 sequence_length,
                 b,
                 decode_length,
+                parameters.clone(),
                 &mut client,
             )
             .await?;
@@ -125,6 +129,7 @@ async fn prefill(
     sequence_length: u32,
     batch_size: u32,
     decode_length: u32,
+    parameters: NextTokenChooserParameters,
     client: &mut ShardedClient,
 ) -> Result<(Prefill, CachedBatch), ClientError> {
     // Create requests
@@ -133,16 +138,7 @@ async fn prefill(
             id: id.into(),
             inputs: sequence.clone(),
             truncate: sequence_length,
-            parameters: Some(NextTokenChooserParameters {
-                temperature: 1.0,
-                top_k: 0,
-                top_p: 1.0,
-                typical_p: 1.0,
-                do_sample: false,
-                seed: 0,
-                repetition_penalty: 1.0,
-                watermark: false,
-            }),
+            parameters: Some(parameters.clone()),
             stopping_parameters: Some(StoppingCriteriaParameters {
                 max_new_tokens: decode_length,
                 stop_sequences: vec![],
diff --git a/benchmark/src/lib.rs b/benchmark/src/lib.rs
index 4da0b573..fcad400c 100644
--- a/benchmark/src/lib.rs
+++ b/benchmark/src/lib.rs
@@ -1,13 +1,14 @@
 mod app;
 mod event;
 mod generation;
+mod table;
 mod utils;
 
 use crate::app::App;
 use crate::event::Event;
 use crossterm::ExecutableCommand;
 use std::io;
-use text_generation_client::ShardedClient;
+use text_generation_client::{NextTokenChooserParameters, ShardedClient};
 use tokenizers::Tokenizer;
 use tokio::sync::{broadcast, mpsc};
 use tui::backend::CrosstermBackend;
@@ -23,8 +24,26 @@ pub async fn run(
     decode_length: u32,
     n_runs: usize,
     warmups: usize,
+    temperature: Option<f32>,
+    top_k: Option<u32>,
+    top_p: Option<f32>,
+    typical_p: Option<f32>,
+    repetition_penalty: Option<f32>,
+    watermark: bool,
+    do_sample: bool,
     client: ShardedClient,
 ) -> Result<(), crossterm::ErrorKind> {
+    let parameters = NextTokenChooserParameters {
+        temperature: temperature.unwrap_or(1.0),
+        top_k: top_k.unwrap_or(0),
+        top_p: top_p.unwrap_or(1.0),
+        typical_p: typical_p.unwrap_or(1.0),
+        do_sample,
+        seed: 0,
+        repetition_penalty: repetition_penalty.unwrap_or(1.0),
+        watermark,
+    };
+
     // Initialize terminal properties
     crossterm::terminal::enable_raw_mode()?;
     io::stdout().execute(crossterm::terminal::EnterAlternateScreen)?;
@@ -53,6 +72,7 @@ pub async fn run(
         decode_length,
         n_runs,
         warmups,
+        parameters,
         client,
         run_sender,
         shutdown_sender.subscribe(),
@@ -73,7 +93,7 @@ pub async fn run(
     // Create App
     let mut app = App::new(
         run_receiver,
-        tokenizer_name,
+        tokenizer_name.clone(),
         sequence_length,
         decode_length,
         n_runs,
@@ -106,5 +126,27 @@ pub async fn run(
     crossterm::terminal::disable_raw_mode()?;
     io::stdout().execute(crossterm::cursor::Show)?;
 
+    let parameters_table = table::parameters_table(
+        tokenizer_name,
+        sequence_length,
+        decode_length,
+        n_runs,
+        warmups,
+        temperature,
+        top_k,
+        top_p,
+        typical_p,
+        repetition_penalty,
+        watermark,
+        do_sample,
+    );
+    println!("\n{parameters_table}\n");
+
+    let latency_table = table::latency_table(&app.data);
+    println!("\n{latency_table}\n");
+
+    let throughput_table = table::throughput_table(&app.data);
+    println!("\n{throughput_table}\n");
+
     Ok(())
 }
diff --git a/benchmark/src/main.rs b/benchmark/src/main.rs
index 03f61dcd..6172d377 100644
--- a/benchmark/src/main.rs
+++ b/benchmark/src/main.rs
@@ -28,11 +28,27 @@ struct Args {
     runs: usize,
     #[clap(default_value = "1", short, long, env)]
     warmups: usize,
+    #[clap(long, env)]
+    temperature: Option<f32>,
+    #[clap(long, env)]
+    top_k: Option<u32>,
+    #[clap(long, env)]
+    top_p: Option<f32>,
+    #[clap(long, env)]
+    typical_p: Option<f32>,
+    #[clap(long, env)]
+    repetition_penalty: Option<f32>,
+    #[clap(long, env)]
+    watermark: bool,
+    #[clap(long, env)]
+    do_sample: bool,
     #[clap(default_value = "/tmp/text-generation-server-0", short, long, env)]
     master_shard_uds_path: String,
 }
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
+    init_logging();
+
     // Get args
     let args = Args::parse();
     // Pattern match configuration
@@ -44,13 +60,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         decode_length,
         runs,
         warmups,
+        temperature,
+        top_k,
+        top_p,
+        typical_p,
+        repetition_penalty,
+        watermark,
+        do_sample,
         master_shard_uds_path,
     } = args;
 
     let batch_size = batch_size.unwrap_or(vec![1, 2, 4, 8, 16, 32]);
 
-    init_logging();
-
     // Tokenizer instance
     // This will only be used to validate payloads
     tracing::info!("Loading tokenizer");
@@ -105,6 +126,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 decode_length,
                 runs,
                 warmups,
+                temperature,
+                top_k,
+                top_p,
+                typical_p,
+                repetition_penalty,
+                watermark,
+                do_sample,
                 sharded_client,
             )
             .await
diff --git a/benchmark/src/table.rs b/benchmark/src/table.rs
new file mode 100644
index 00000000..6b74bc36
--- /dev/null
+++ b/benchmark/src/table.rs
@@ -0,0 +1,170 @@
+use crate::app::Data;
+use tabled::settings::Merge;
+use tabled::{builder::Builder, settings::Style, Table};
+
+#[allow(clippy::too_many_arguments)]
+pub(crate) fn parameters_table(
+    tokenizer_name: String,
+    sequence_length: u32,
+    decode_length: u32,
+    n_runs: usize,
+    warmups: usize,
+    temperature: Option<f32>,
+    top_k: Option<u32>,
+    top_p: Option<f32>,
+    typical_p: Option<f32>,
+    repetition_penalty: Option<f32>,
+    watermark: bool,
+    do_sample: bool,
+) -> Table {
+    let mut builder = Builder::default();
+
+    builder.set_header(["Parameter", "Value"]);
+
+    builder.push_record(["Model", &tokenizer_name]);
+    builder.push_record(["Sequence Length", &sequence_length.to_string()]);
+    builder.push_record(["Decode Length", &decode_length.to_string()]);
+    builder.push_record(["N Runs", &n_runs.to_string()]);
+    builder.push_record(["Warmups", &warmups.to_string()]);
+    builder.push_record(["Temperature", &format!("{temperature:?}")]);
+    builder.push_record(["Top K", &format!("{top_k:?}")]);
+    builder.push_record(["Top P", &format!("{top_p:?}")]);
+    builder.push_record(["Typical P", &format!("{typical_p:?}")]);
+    builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]);
+    builder.push_record(["Watermark", &watermark.to_string()]);
+    builder.push_record(["Do Sample", &do_sample.to_string()]);
+
+    let mut table = builder.build();
+    table.with(Style::markdown());
+    table
+}
+
+pub(crate) fn latency_table(data: &Data) -> Table {
+    let mut builder = Builder::default();
+
+    builder.set_header([
+        "Step",
+        "Batch Size",
+        "Average",
+        "Lowest",
+        "Highest",
+        "p50",
+        "p90",
+        "p99",
+    ]);
+
+    add_latencies(
+        &mut builder,
+        "Prefill",
+        &data.batch_size,
+        &data.prefill_latencies,
+    );
+    add_latencies(
+        &mut builder,
+        "Decode (token)",
+        &data.batch_size,
+        &data.decode_token_latencies,
+    );
+    add_latencies(
+        &mut builder,
+        "Decode (total)",
+        &data.batch_size,
+        &data.decode_latencies,
+    );
+
+    let mut table = builder.build();
+    table.with(Style::markdown()).with(Merge::vertical());
+    table
+}
+
+pub(crate) fn throughput_table(data: &Data) -> Table {
+    let mut builder = Builder::default();
+
+    builder.set_header(["Step", "Batch Size", "Average", "Lowest", "Highest"]);
+
+    add_throuhgputs(
+        &mut builder,
+        "Prefill",
+        &data.batch_size,
+        &data.prefill_throughputs,
+    );
+    add_throuhgputs(
+        &mut builder,
+        "Decode",
+        &data.batch_size,
+        &data.decode_throughputs,
+    );
+
+    let mut table = builder.build();
+    table.with(Style::markdown()).with(Merge::vertical());
+    table
+}
+
+fn add_latencies(
+    builder: &mut Builder,
+    step: &'static str,
+    batch_size: &[u32],
+    batch_latencies: &[Vec<f64>],
+) {
+    for (i, b) in batch_size.iter().enumerate() {
+        let latencies = &batch_latencies[i];
+        let (avg, min, max) = avg_min_max(latencies);
+
+        let row = [
+            step,
+            &b.to_string(),
+            &format_value(avg, "ms"),
+            &format_value(min, "ms"),
+            &format_value(max, "ms"),
+            &format_value(px(latencies, 50), "ms"),
+            &format_value(px(latencies, 90), "ms"),
+            &format_value(px(latencies, 99), "ms"),
+        ];
+
+        builder.push_record(row);
+    }
+}
+
+fn add_throuhgputs(
+    builder: &mut Builder,
+    step: &'static str,
+    batch_size: &[u32],
+    batch_throughputs: &[Vec<f64>],
+) {
+    for (i, b) in batch_size.iter().enumerate() {
+        let throughputs = &batch_throughputs[i];
+        let (avg, min, max) = avg_min_max(throughputs);
+
+        let row = [
+            step,
+            &b.to_string(),
+            &format_value(avg, "tokens/secs"),
+            &format_value(min, "tokens/secs"),
+            &format_value(max, "tokens/secs"),
+        ];
+
+        builder.push_record(row);
+    }
+}
+
+fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
+    let average = data.iter().sum::<f64>() / data.len() as f64;
+    let min = data
+        .iter()
+        .min_by(|a, b| a.total_cmp(b))
+        .unwrap_or(&std::f64::NAN);
+    let max = data
+        .iter()
+        .max_by(|a, b| a.total_cmp(b))
+        .unwrap_or(&std::f64::NAN);
+    (average, *min, *max)
+}
+
+fn px(data: &Vec<f64>, p: u32) -> f64 {
+    let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
+    *data.get(i).unwrap_or(&std::f64::NAN)
+}
+
+fn format_value(value: f64, unit: &'static str) -> String {
+    format!("{:.2} {unit}", value)
+}