parent
e3e487dc71
commit
942005386a
|
@ -231,8 +231,11 @@ def launcher(event_loop):
|
||||||
if quantize:
|
if quantize:
|
||||||
args.append("--quantize")
|
args.append("--quantize")
|
||||||
|
|
||||||
|
env = os.environ
|
||||||
|
env["LOG_LEVEL"] = "info,text_generation_router=debug"
|
||||||
|
|
||||||
with subprocess.Popen(
|
with subprocess.Popen(
|
||||||
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
|
||||||
) as process:
|
) as process:
|
||||||
yield ProcessLauncherHandle(process, port)
|
yield ProcessLauncherHandle(process, port)
|
||||||
|
|
||||||
|
@ -271,7 +274,7 @@ def launcher(event_loop):
|
||||||
|
|
||||||
gpu_count = num_shard if num_shard is not None else 1
|
gpu_count = num_shard if num_shard is not None else 1
|
||||||
|
|
||||||
env = {}
|
env = {"LOG_LEVEL": "info,text_generation_router=debug"}
|
||||||
if HUGGING_FACE_HUB_TOKEN is not None:
|
if HUGGING_FACE_HUB_TOKEN is not None:
|
||||||
env["HUGGING_FACE_HUB_TOKEN"] = HUGGING_FACE_HUB_TOKEN
|
env["HUGGING_FACE_HUB_TOKEN"] = HUGGING_FACE_HUB_TOKEN
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ use utoipa_swagger_ui::SwaggerUi;
|
||||||
example = json ! ({"error": "Incomplete generation"})),
|
example = json ! ({"error": "Incomplete generation"})),
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
#[instrument(skip(infer))]
|
#[instrument(skip(infer, req))]
|
||||||
async fn compat_generate(
|
async fn compat_generate(
|
||||||
default_return_full_text: Extension<bool>,
|
default_return_full_text: Extension<bool>,
|
||||||
infer: Extension<Infer>,
|
infer: Extension<Infer>,
|
||||||
|
@ -133,8 +133,9 @@ async fn health(mut health: Extension<Health>) -> Result<(), (StatusCode, Json<E
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
#[instrument(
|
#[instrument(
|
||||||
skip(infer),
|
skip_all,
|
||||||
fields(
|
fields(
|
||||||
|
parameters = ?req.0.parameters,
|
||||||
total_time,
|
total_time,
|
||||||
validation_time,
|
validation_time,
|
||||||
queue_time,
|
queue_time,
|
||||||
|
@ -151,6 +152,8 @@ async fn generate(
|
||||||
let start_time = Instant::now();
|
let start_time = Instant::now();
|
||||||
metrics::increment_counter!("tgi_request_count");
|
metrics::increment_counter!("tgi_request_count");
|
||||||
|
|
||||||
|
tracing::debug!("Input: {}", req.0.inputs);
|
||||||
|
|
||||||
let compute_characters = req.0.inputs.chars().count();
|
let compute_characters = req.0.inputs.chars().count();
|
||||||
let mut add_prompt = None;
|
let mut add_prompt = None;
|
||||||
if req.0.parameters.return_full_text.unwrap_or(false) {
|
if req.0.parameters.return_full_text.unwrap_or(false) {
|
||||||
|
@ -282,7 +285,8 @@ async fn generate(
|
||||||
output_text = prompt + &output_text;
|
output_text = prompt + &output_text;
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::info!("Output: {}", output_text);
|
tracing::debug!("Output: {}", output_text);
|
||||||
|
tracing::info!("Success");
|
||||||
|
|
||||||
let response = GenerateResponse {
|
let response = GenerateResponse {
|
||||||
generated_text: output_text,
|
generated_text: output_text,
|
||||||
|
@ -315,8 +319,9 @@ async fn generate(
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
#[instrument(
|
#[instrument(
|
||||||
skip(infer),
|
skip_all,
|
||||||
fields(
|
fields(
|
||||||
|
parameters = ?req.0.parameters,
|
||||||
total_time,
|
total_time,
|
||||||
validation_time,
|
validation_time,
|
||||||
queue_time,
|
queue_time,
|
||||||
|
@ -336,6 +341,8 @@ async fn generate_stream(
|
||||||
let start_time = Instant::now();
|
let start_time = Instant::now();
|
||||||
metrics::increment_counter!("tgi_request_count");
|
metrics::increment_counter!("tgi_request_count");
|
||||||
|
|
||||||
|
tracing::debug!("Input: {}", req.0.inputs);
|
||||||
|
|
||||||
let compute_characters = req.0.inputs.chars().count();
|
let compute_characters = req.0.inputs.chars().count();
|
||||||
|
|
||||||
let mut headers = HeaderMap::new();
|
let mut headers = HeaderMap::new();
|
||||||
|
@ -370,6 +377,8 @@ async fn generate_stream(
|
||||||
InferStreamResponse::Prefill(_) => {}
|
InferStreamResponse::Prefill(_) => {}
|
||||||
// Yield event for every new token
|
// Yield event for every new token
|
||||||
InferStreamResponse::Token(token) => {
|
InferStreamResponse::Token(token) => {
|
||||||
|
tracing::debug!(parent: &span, "Token: {:?}", token);
|
||||||
|
|
||||||
// StreamResponse
|
// StreamResponse
|
||||||
let stream_token = StreamResponse {
|
let stream_token = StreamResponse {
|
||||||
token,
|
token,
|
||||||
|
@ -428,7 +437,8 @@ async fn generate_stream(
|
||||||
output_text = prompt + &output_text;
|
output_text = prompt + &output_text;
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::info!(parent: &span, "Output: {}", output_text);
|
tracing::debug!(parent: &span, "Output: {}", output_text);
|
||||||
|
tracing::info!(parent: &span, "Success");
|
||||||
|
|
||||||
let stream_token = StreamResponse {
|
let stream_token = StreamResponse {
|
||||||
token,
|
token,
|
||||||
|
|
Loading…
Reference in New Issue