18 lines
481 B
JavaScript
18 lines
481 B
JavaScript
|
import { get_options, run } from "./common.js";
|
||
|
|
||
|
const reference_latency_ms = 30;
|
||
|
const host = __ENV.HOST || '127.0.0.1:8000';
|
||
|
const max_new_tokens = 50;
|
||
|
|
||
|
|
||
|
function generate_payload(gpt){
|
||
|
const input = gpt["conversations"][0]["value"];
|
||
|
return {"inputs": input, "parameters": {"max_new_tokens": max_new_tokens, "temperature" : 0.5}}
|
||
|
}
|
||
|
|
||
|
export const options = get_options(reference_latency_ms);
|
||
|
|
||
|
export default function(){
|
||
|
run(host, generate_payload, max_new_tokens);
|
||
|
}
|