Init
This commit is contained in:
commit
295831a481
|
@ -0,0 +1,37 @@
|
|||
# BLOOM Inference
|
||||
|
||||
A Rust and gRPC server for BLOOM Inference.
|
||||
|
||||
## Install
|
||||
|
||||
```shell
|
||||
cd server
|
||||
pip install .
|
||||
```
|
||||
|
||||
```
|
||||
cd router
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```shell
|
||||
python server/bloom_inference/main.py bigscience/bloom --num-gpus 8 --shard-directory /dev/shm/models
|
||||
```
|
||||
|
||||
```shell
|
||||
./router/target/release/router
|
||||
```
|
||||
|
||||
## TODO:
|
||||
|
||||
- [ ] Improve model download
|
||||
- Store "shardable" layers separately and layer by layer
|
||||
- [ ] Add batching args to router CLI
|
||||
- [ ] Add docstrings + comments everywhere as the codebase is fairly complicated
|
||||
- [ ] Add tests
|
||||
- [ ] Add shutdown logic in router and server
|
||||
- [ ] Improve multi-processing logic in server
|
||||
- [ ] Improve error handling everywhere
|
||||
- [ ] Improve past key layer indexing?
|
|
@ -0,0 +1,83 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package generate.v1;
|
||||
|
||||
service TextGeneration {
|
||||
/// Service discovery
|
||||
rpc ServiceDiscovery(Empty) returns (ServiceDiscoveryResponse) {}
|
||||
/// Empties batch cache
|
||||
rpc ClearCache(Empty) returns (Empty);
|
||||
/// Generate tokens for a batch without cache
|
||||
rpc Generate(Batch) returns (Response);
|
||||
/// Generate tokens for a batch with cache
|
||||
rpc GenerateWithCache(BatchCached) returns (Response);
|
||||
}
|
||||
|
||||
message ServiceDiscoveryResponse {
|
||||
repeated string urls = 1;
|
||||
}
|
||||
|
||||
message LogitsWarperParameters {
|
||||
float temperature = 1;
|
||||
uint32 top_k = 2;
|
||||
float top_p = 3;
|
||||
bool do_sample = 4;
|
||||
}
|
||||
|
||||
message Request {
|
||||
/// Request ID
|
||||
uint64 id = 1;
|
||||
/// The generation context
|
||||
string inputs = 2;
|
||||
/// Logits Warper Parameters
|
||||
LogitsWarperParameters parameters = 3;
|
||||
/// Stopping criteria
|
||||
uint32 max_new_tokens = 4;
|
||||
}
|
||||
|
||||
message Batch {
|
||||
/// Batch ID
|
||||
uint64 id = 1;
|
||||
/// Individual requests
|
||||
repeated Request requests = 2;
|
||||
}
|
||||
|
||||
message BatchCached {
|
||||
/// Batch ID
|
||||
uint64 id = 1;
|
||||
/// Request ids within cache
|
||||
repeated uint64 request_ids = 2;
|
||||
/// Cache IDs
|
||||
repeated uint64 batch_cached_ids = 3;
|
||||
/// Batch size (sum of all batch sizes)
|
||||
uint32 total_batch_size = 4;
|
||||
/// Max sequence length
|
||||
uint32 max_sequence_length = 5;
|
||||
}
|
||||
|
||||
message FinishedGeneration {
|
||||
/// ID of the original request
|
||||
uint64 id = 1;
|
||||
/// Output
|
||||
string output = 2;
|
||||
}
|
||||
|
||||
message CacheEntry {
|
||||
/// Cache ID; same as batch ID
|
||||
uint64 id = 1;
|
||||
/// Requests present in cache entry
|
||||
repeated uint64 request_ids = 2;
|
||||
/// Sequence length
|
||||
uint32 sequence_length = 3;
|
||||
}
|
||||
|
||||
message Response {
|
||||
/// Finished requests (optional)
|
||||
repeated FinishedGeneration finished = 1;
|
||||
/// Cache entry (optional)
|
||||
optional CacheEntry cache_entry = 2;
|
||||
}
|
||||
|
||||
|
||||
// Represent an empty message.
|
||||
message Empty {}
|
|
@ -0,0 +1 @@
|
|||
/target
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,28 @@
|
|||
[package]
|
||||
name = "bloom-inference"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
bloom-inference-client = { path = "client" }
|
||||
futures = "0.3.24"
|
||||
parking_lot = "0.12.1"
|
||||
poem = "1.3.45"
|
||||
serde = "1.0.145"
|
||||
serde_json = "1.0.85"
|
||||
tokenizers = "0.13.0"
|
||||
tokio = { version = "1.21.1", features = ["rt-multi-thread", "parking_lot", "sync"] }
|
||||
tracing = "0.1.36"
|
||||
tracing-subscriber = "0.3.15"
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"client",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
incremental = true
|
||||
lto = "off"
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "bloom-inference-client"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
futures = "0.3.24"
|
||||
#grpc-error-details = { path = "../../grpc-error-details" }
|
||||
#grpc-metadata = { path = "../../grpc-metadata" }
|
||||
prost = "^0.9"
|
||||
thiserror = "1.0.37"
|
||||
tokio = { version = "1.21.2", features = ["sync"] }
|
||||
tonic = "^0.6"
|
||||
tower = "^0.4"
|
||||
tracing = "^0.1"
|
||||
tracing-error = "^0.2"
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.6.2"
|
|
@ -0,0 +1,14 @@
|
|||
use std::fs;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
fs::create_dir("src/pb").unwrap_or(());
|
||||
tonic_build::configure()
|
||||
.build_client(true)
|
||||
.build_server(false)
|
||||
.out_dir("src/pb")
|
||||
.include_file("mod.rs")
|
||||
.compile(&["../../proto/generate.proto"], &["../../proto"])
|
||||
.unwrap_or_else(|e| panic!("protobuf compilation failed: {}", e));
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
use crate::pb::generate::v1::text_generation_client::TextGenerationClient;
|
||||
use crate::pb::generate::v1::*;
|
||||
use crate::Result;
|
||||
use std::time::Duration;
|
||||
use tonic::transport::{Channel, Uri};
|
||||
use tower::timeout::Timeout;
|
||||
use tracing::*;
|
||||
|
||||
/// BLOOM Inference gRPC client
|
||||
#[derive(Clone)]
|
||||
pub struct Client {
|
||||
stub: TextGenerationClient<Timeout<Channel>>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
/// Returns a client connected to the given url. Requests exceeding timeout will fail.
|
||||
pub async fn connect(uri: Uri, timeout: Duration) -> Self {
|
||||
let channel = Channel::builder(uri)
|
||||
.connect()
|
||||
.await
|
||||
.expect("Transport error");
|
||||
let timeout_channel = Timeout::new(channel, timeout);
|
||||
|
||||
Self {
|
||||
stub: TextGenerationClient::new(timeout_channel),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a client connected to the given unix socket. Requests exceeding timeout will fail.
|
||||
pub async fn connect_uds(path: String, timeout: Duration) -> Self {
|
||||
let channel = Channel::from_shared(format!("http://[::]:50051"))
|
||||
.unwrap()
|
||||
.connect_with_connector(tower::service_fn(move |_: Uri| {
|
||||
tokio::net::UnixStream::connect(path.clone())
|
||||
}))
|
||||
.await
|
||||
.expect("Transport error");
|
||||
let timeout_channel = Timeout::new(channel, timeout);
|
||||
|
||||
Self {
|
||||
stub: TextGenerationClient::new(timeout_channel),
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn service_discovery(&mut self) -> Result<Vec<String>> {
|
||||
let request = tonic::Request::new(Empty {});
|
||||
let response = self
|
||||
.stub
|
||||
.service_discovery(request)
|
||||
.instrument(info_span!("service_discovery"))
|
||||
.await?;
|
||||
let urls = response
|
||||
.into_inner()
|
||||
.urls
|
||||
.into_iter()
|
||||
.map(|url| match url.strip_prefix("unix://") {
|
||||
None => url,
|
||||
Some(stripped_url) => stripped_url.to_string(),
|
||||
})
|
||||
.collect();
|
||||
Ok(urls)
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn clear_cache(&mut self) -> Result<()> {
|
||||
let request = tonic::Request::new(Empty {});
|
||||
self.stub
|
||||
.clear_cache(request)
|
||||
.instrument(info_span!("clear_cache"))
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn generate(
|
||||
&mut self,
|
||||
request: Batch,
|
||||
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||
let request = tonic::Request::new(request);
|
||||
let response = self
|
||||
.stub
|
||||
.generate(request)
|
||||
.instrument(info_span!("generate"))
|
||||
.await?
|
||||
.into_inner();
|
||||
Ok((response.finished, response.cache_entry))
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn generate_with_cache(
|
||||
&mut self,
|
||||
request: BatchCached,
|
||||
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||
let request = tonic::Request::new(request);
|
||||
let response = self
|
||||
.stub
|
||||
.generate_with_cache(request)
|
||||
.instrument(info_span!("generate_with_cache"))
|
||||
.await?
|
||||
.into_inner();
|
||||
Ok((response.finished, response.cache_entry))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
//! BLOOM Inference gRPC client library
|
||||
|
||||
mod client;
|
||||
mod pb;
|
||||
mod sharded_client;
|
||||
|
||||
pub use client::Client;
|
||||
pub use pb::generate::v1::{
|
||||
Batch, BatchCached, CacheEntry, FinishedGeneration, LogitsWarperParameters, Request,
|
||||
};
|
||||
pub use sharded_client::ShardedClient;
|
||||
use thiserror::Error;
|
||||
pub use tonic::transport::Uri;
|
||||
use tonic::Status;
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
#[error("Text generation client error: {msg:?}")]
|
||||
pub struct ClientError {
|
||||
msg: String,
|
||||
// source: Status,
|
||||
}
|
||||
|
||||
impl From<Status> for ClientError {
|
||||
fn from(err: Status) -> Self {
|
||||
Self {
|
||||
msg: err.to_string(),
|
||||
// source: err,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, ClientError>;
|
|
@ -0,0 +1 @@
|
|||
*.rs
|
|
@ -0,0 +1,106 @@
|
|||
use crate::Result;
|
||||
use crate::{Batch, BatchCached, CacheEntry, Client, FinishedGeneration};
|
||||
use futures::future::join_all;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::{broadcast, mpsc};
|
||||
use tonic::transport::Uri;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum Command {
|
||||
Generate(
|
||||
Batch,
|
||||
mpsc::Sender<Result<(Vec<FinishedGeneration>, Option<CacheEntry>)>>,
|
||||
),
|
||||
GenerateWithCache(
|
||||
BatchCached,
|
||||
mpsc::Sender<Result<(Vec<FinishedGeneration>, Option<CacheEntry>)>>,
|
||||
),
|
||||
ClearCache(mpsc::Sender<Result<()>>),
|
||||
}
|
||||
|
||||
async fn client_task(mut client: Client, mut request_subscriber: broadcast::Receiver<Command>) {
|
||||
while let Ok(message) = request_subscriber.recv().await {
|
||||
match message {
|
||||
Command::Generate(batch, response_tx) => {
|
||||
let result = client.generate(batch).await;
|
||||
response_tx.try_send(result).unwrap_or(());
|
||||
}
|
||||
Command::GenerateWithCache(batch_cached, response_tx) => {
|
||||
let result = client.generate_with_cache(batch_cached).await;
|
||||
response_tx.try_send(result).unwrap_or(());
|
||||
}
|
||||
Command::ClearCache(response_tx) => {
|
||||
let result = client.clear_cache().await;
|
||||
response_tx.try_send(result).unwrap_or(());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ShardedClient {
|
||||
request_tx: broadcast::Sender<Command>,
|
||||
}
|
||||
|
||||
impl ShardedClient {
|
||||
fn new(mut clients: Vec<Client>) -> Self {
|
||||
let (request_tx, _) = broadcast::channel(1);
|
||||
|
||||
for client in clients.drain(..) {
|
||||
let request_subscriber = request_tx.subscribe();
|
||||
tokio::spawn(client_task(client, request_subscriber));
|
||||
}
|
||||
|
||||
Self { request_tx }
|
||||
}
|
||||
|
||||
async fn from_master_client(mut master_client: Client) -> Self {
|
||||
let uris = master_client.service_discovery().await.unwrap();
|
||||
let futures = uris
|
||||
.into_iter()
|
||||
.map(|path| Client::connect_uds(path, Duration::from_secs(5)));
|
||||
let clients = join_all(futures).await;
|
||||
Self::new(clients)
|
||||
}
|
||||
|
||||
/// Returns a client connected to the given url. Requests exceeding timeout will fail.
|
||||
pub async fn connect(uri: Uri, timeout: Duration) -> Self {
|
||||
let master_client = Client::connect(uri, timeout).await;
|
||||
Self::from_master_client(master_client).await
|
||||
}
|
||||
|
||||
/// Returns a client connected to the given unix socket. Requests exceeding timeout will fail.
|
||||
pub async fn connect_uds(path: String, timeout: Duration) -> Self {
|
||||
let master_client = Client::connect_uds(path, timeout).await;
|
||||
Self::from_master_client(master_client).await
|
||||
}
|
||||
|
||||
pub async fn generate(
|
||||
&self,
|
||||
batch: Batch,
|
||||
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||
let (response_tx, mut response_rx) = mpsc::channel(1);
|
||||
self.request_tx
|
||||
.send(Command::Generate(batch, response_tx))
|
||||
.unwrap();
|
||||
response_rx.recv().await.unwrap()
|
||||
}
|
||||
|
||||
pub async fn generate_with_cache(
|
||||
&self,
|
||||
batch_cached: BatchCached,
|
||||
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||
let (response_tx, mut response_rx) = mpsc::channel(1);
|
||||
self.request_tx
|
||||
.send(Command::GenerateWithCache(batch_cached, response_tx))
|
||||
.unwrap();
|
||||
response_rx.recv().await.unwrap()
|
||||
}
|
||||
|
||||
pub async fn clear_cache(&self) -> Result<()> {
|
||||
let (response_tx, mut response_rx) = mpsc::channel(1);
|
||||
self.request_tx
|
||||
.send(Command::ClearCache(response_tx))
|
||||
.unwrap();
|
||||
response_rx.recv().await.unwrap()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
/// This code is massively inspired by Tokio mini-redis
|
||||
use crate::GenerateRequest;
|
||||
use bloom_inference_client::{Batch, ClientError, LogitsWarperParameters, Request};
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot::Sender;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Db {
|
||||
pub shared: Arc<Shared>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Shared {
|
||||
state: RwLock<State>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct State {
|
||||
entries: BTreeMap<u64, (Request, Sender<Result<String, ClientError>>)>,
|
||||
|
||||
/// Identifier to use for the next expiration. Each expiration is associated
|
||||
/// with a unique identifier. See above for why.
|
||||
next_id: u64,
|
||||
|
||||
next_batch_id: u64,
|
||||
|
||||
/// Current batch id
|
||||
next_batch_start_id: u64,
|
||||
}
|
||||
|
||||
impl Db {
|
||||
pub(crate) fn new() -> Self {
|
||||
let shared = Arc::new(Shared {
|
||||
state: RwLock::new(State {
|
||||
entries: BTreeMap::new(),
|
||||
next_id: 0,
|
||||
next_batch_id: 0,
|
||||
next_batch_start_id: 0,
|
||||
}),
|
||||
});
|
||||
|
||||
Self { shared }
|
||||
}
|
||||
|
||||
pub(crate) fn append(&self, request: GenerateRequest, sender: Sender<Result<String, ClientError>>) {
|
||||
let mut state = self.shared.state.write();
|
||||
|
||||
let id = state.next_id;
|
||||
state.next_id += 1;
|
||||
|
||||
let parameters = Some(LogitsWarperParameters {
|
||||
temperature: request.parameters.temperature,
|
||||
top_k: request.parameters.top_k,
|
||||
top_p: request.parameters.top_p,
|
||||
do_sample: request.parameters.do_sample,
|
||||
});
|
||||
let request = Request {
|
||||
id,
|
||||
inputs: request.inputs,
|
||||
parameters,
|
||||
max_new_tokens: request.parameters.max_new_tokens,
|
||||
};
|
||||
state.entries.insert(id, (request, sender));
|
||||
}
|
||||
|
||||
pub(crate) fn remove(&self, id: &u64) -> Option<(Request, Sender<Result<String, ClientError>>)> {
|
||||
let mut state = self.shared.state.write();
|
||||
state.entries.remove(id)
|
||||
}
|
||||
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
let state = self.shared.state.read();
|
||||
state.entries.len()
|
||||
}
|
||||
|
||||
fn next_requests(&self, max_size: usize) -> Option<(u64, Vec<Request>)> {
|
||||
let state = self.shared.state.read();
|
||||
|
||||
let requests: Vec<Request> = state
|
||||
.entries
|
||||
.range(state.next_batch_start_id..)
|
||||
.take(max_size)
|
||||
.map(|(_, (request, _))| request.clone())
|
||||
.collect();
|
||||
|
||||
if requests.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let last_id = requests.last().unwrap().id;
|
||||
Some((last_id, requests))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch(&self, max_size: usize) -> Option<Batch> {
|
||||
if let Some((last_id, requests)) = self.next_requests(max_size) {
|
||||
let mut state = self.shared.state.write();
|
||||
let batch = Batch {
|
||||
id: state.next_batch_id,
|
||||
requests,
|
||||
};
|
||||
state.next_batch_start_id = last_id + 1;
|
||||
state.next_batch_id += 1;
|
||||
return Some(batch);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch_minimum_size(
|
||||
&self,
|
||||
min_size: usize,
|
||||
max_size: usize,
|
||||
) -> Option<Batch> {
|
||||
if let Some((last_id, requests)) = self.next_requests(max_size) {
|
||||
if requests.len() >= min_size {
|
||||
let mut state = self.shared.state.write();
|
||||
let batch = Batch {
|
||||
id: state.next_batch_id,
|
||||
requests,
|
||||
};
|
||||
state.next_batch_start_id = last_id + 1;
|
||||
state.next_batch_id += 1;
|
||||
return Some(batch);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
use crate::{Db, GenerateRequest};
|
||||
use bloom_inference_client::{Batch, BatchCached, CacheEntry, ClientError, FinishedGeneration, ShardedClient};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{oneshot, Notify};
|
||||
|
||||
const MAX_LENGTH: usize = 128;
|
||||
|
||||
pub struct InferError {}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct Infer {
|
||||
db: Db,
|
||||
shared: Arc<Shared>,
|
||||
}
|
||||
|
||||
struct Shared {
|
||||
batching_task: Notify,
|
||||
}
|
||||
|
||||
impl Infer {
|
||||
pub(crate) fn new(client: ShardedClient) -> Self {
|
||||
let db = Db::new();
|
||||
let shared = Arc::new(Shared {
|
||||
batching_task: Notify::new(),
|
||||
});
|
||||
|
||||
tokio::spawn(batching_task(client, db.clone(), shared.clone()));
|
||||
|
||||
Self { db, shared }
|
||||
}
|
||||
|
||||
pub(crate) async fn infer(&self, request: GenerateRequest) -> Result<String, InferError> {
|
||||
if self.db.len() > MAX_LENGTH {
|
||||
return Err(InferError {});
|
||||
}
|
||||
let (request_tx, request_rx) = oneshot::channel();
|
||||
self.db.append(request, request_tx);
|
||||
self.shared.batching_task.notify_waiters();
|
||||
match request_rx.await.unwrap() {
|
||||
Ok(output) => Ok(output),
|
||||
Err(_) => Err(InferError {})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn batching_task(client: ShardedClient, db: Db, shared: Arc<Shared>) {
|
||||
loop {
|
||||
shared.batching_task.notified().await;
|
||||
|
||||
if let Some(batch) = db.next_batch(32) {
|
||||
let mut cache_entry = infer_batch(batch, &client, &db).await;
|
||||
|
||||
loop {
|
||||
if let Some(entry) = cache_entry {
|
||||
let mut batch_cached_ids = vec![entry.id];
|
||||
let mut total_batch_size = entry.request_ids.len();
|
||||
let mut max_sequence_length = entry.sequence_length;
|
||||
let mut request_ids = entry.request_ids;
|
||||
|
||||
if total_batch_size <= 16 {
|
||||
if let Some(batch) = db.next_batch_minimum_size(16, 48) {
|
||||
let other_cache_entry = infer_batch(batch, &client, &db).await;
|
||||
|
||||
if let Some(entry) = other_cache_entry {
|
||||
batch_cached_ids.push(entry.id);
|
||||
total_batch_size += entry.request_ids.len();
|
||||
max_sequence_length =
|
||||
max_sequence_length.max(entry.sequence_length);
|
||||
request_ids.extend(entry.request_ids.into_iter());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let batch_cached = BatchCached {
|
||||
id: entry.id,
|
||||
batch_cached_ids,
|
||||
total_batch_size: total_batch_size as u32,
|
||||
max_sequence_length,
|
||||
request_ids,
|
||||
};
|
||||
cache_entry = infer_batch_cached(batch_cached, &client, &db).await;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn infer_batch_cached(batch: BatchCached, client: &ShardedClient, db: &Db) -> Option<CacheEntry> {
|
||||
match client.generate_with_cache(batch.clone()).await {
|
||||
Ok((finished, cache_entry)) => {
|
||||
send_finished(finished, db);
|
||||
cache_entry
|
||||
}
|
||||
Err(err) => {
|
||||
println!("{:?}", err);
|
||||
send_error(err, batch.request_ids, &db);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn infer_batch(batch: Batch, client: &ShardedClient, db: &Db) -> Option<CacheEntry> {
|
||||
match client.generate(batch.clone()).await {
|
||||
Ok((finished, cache_entry)) => {
|
||||
send_finished(finished, db);
|
||||
cache_entry
|
||||
}
|
||||
Err(err) => {
|
||||
println!("{:?}", err);
|
||||
send_error(err, batch.requests.into_iter().map(|req| req.id).collect(), &db);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn send_error(error: ClientError, request_ids: Vec<u64>, db: &Db) {
|
||||
request_ids.into_iter().for_each(|id| {
|
||||
let (_, response_tx) = db.remove(&id).unwrap();
|
||||
response_tx.send(Err(error.clone())).unwrap_or(());
|
||||
});
|
||||
}
|
||||
|
||||
fn send_finished(finished: Vec<FinishedGeneration>, db: &Db) {
|
||||
finished.into_iter().for_each(|output| {
|
||||
let (_, response_tx) = db.remove(&output.id).unwrap();
|
||||
response_tx.send(Ok(output.output)).unwrap_or(());
|
||||
});
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
use tokio::time::Instant;
|
||||
|
||||
use poem;
|
||||
use poem::middleware::AddData;
|
||||
use poem::web::Data;
|
||||
use poem::{handler, listener::TcpListener, post, web::Json, EndpointExt, Result, Route, Server};
|
||||
|
||||
use bloom_inference_client::ShardedClient;
|
||||
use serde::Deserialize;
|
||||
use std::time::Duration;
|
||||
use poem::http::StatusCode;
|
||||
use tracing::instrument;
|
||||
|
||||
mod db;
|
||||
|
||||
use db::Db;
|
||||
|
||||
mod infer;
|
||||
|
||||
use infer::Infer;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
struct GenerateParameters {
|
||||
#[serde(default = "default_temperature")]
|
||||
temperature: f32,
|
||||
#[serde(default = "default_top_k")]
|
||||
top_k: u32,
|
||||
#[serde(default = "default_top_p")]
|
||||
top_p: f32,
|
||||
#[serde(default = "default_do_sample")]
|
||||
do_sample: bool,
|
||||
#[serde(default = "default_max_new_tokens")]
|
||||
max_new_tokens: u32,
|
||||
}
|
||||
|
||||
fn default_temperature() -> f32 {
|
||||
1.0
|
||||
}
|
||||
|
||||
fn default_top_k() -> u32 {
|
||||
0
|
||||
}
|
||||
|
||||
fn default_top_p() -> f32 {
|
||||
1.0
|
||||
}
|
||||
|
||||
fn default_do_sample() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn default_max_new_tokens() -> u32 {
|
||||
20
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
struct GenerateRequest {
|
||||
inputs: String,
|
||||
#[serde(default = "default_parameters")]
|
||||
parameters: GenerateParameters,
|
||||
}
|
||||
|
||||
fn default_parameters() -> GenerateParameters {
|
||||
GenerateParameters {
|
||||
temperature: default_temperature(),
|
||||
top_k: default_top_k(),
|
||||
top_p: default_top_p(),
|
||||
do_sample: default_do_sample(),
|
||||
max_new_tokens: default_max_new_tokens(),
|
||||
}
|
||||
}
|
||||
|
||||
#[handler]
|
||||
#[instrument(skip(infer), fields(time, time_per_token))]
|
||||
async fn generate(
|
||||
infer: Data<&Infer>,
|
||||
req: Json<GenerateRequest>,
|
||||
) -> Result<Json<serde_json::Value>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let output = infer
|
||||
.infer(GenerateRequest {
|
||||
inputs: req.inputs.clone(),
|
||||
parameters: req.parameters.clone(),
|
||||
})
|
||||
.await;
|
||||
|
||||
match output {
|
||||
Ok(generated_text) => {
|
||||
tracing::Span::current().record("time", format!("{:?}", start.elapsed()));
|
||||
tracing::Span::current().record("time_per_token", format!("{:?}", start.elapsed() / req.parameters.max_new_tokens));
|
||||
tracing::info!("response: {}", generated_text);
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
"generated_text": generated_text,
|
||||
})))
|
||||
}
|
||||
Err(_) => {
|
||||
Err(poem::Error::from_status(StatusCode::INTERNAL_SERVER_ERROR))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), std::io::Error> {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let sharded_client =
|
||||
ShardedClient::connect_uds("/tmp/bloom-inference-0".to_string(), Duration::from_secs(5))
|
||||
.await;
|
||||
sharded_client
|
||||
.clear_cache()
|
||||
.await
|
||||
.expect("Unable to clear cache");
|
||||
tracing::info!("Connected");
|
||||
|
||||
let infer = Infer::new(sharded_client);
|
||||
|
||||
let app = Route::new()
|
||||
.at("/generate", post(generate))
|
||||
.with(AddData::new(infer));
|
||||
Server::new(TcpListener::bind("127.0.0.1:3000"))
|
||||
.run(app)
|
||||
.await
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,20 @@
|
|||
gen-server:
|
||||
mkdir bloom_inference/pb || true
|
||||
python -m grpc_tools.protoc -I../proto --python_out=bloom_inference/pb --grpc_python_out=bloom_inference/pb ../proto/generate.proto
|
||||
find bloom_inference/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
||||
touch bloom_inference/pb/__init__.py
|
||||
|
||||
unit-tests:
|
||||
python -m pytest --cov=bloom_inference tests
|
||||
|
||||
unit-tests-reporting:
|
||||
python -m pytest --junitxml=report.xml --cov=bloom_inference tests
|
||||
|
||||
pip-install:
|
||||
pip install grpcio-tools
|
||||
make gen-server
|
||||
pip install .
|
||||
|
||||
install:
|
||||
poetry install
|
||||
make gen-server
|
|
@ -0,0 +1,15 @@
|
|||
# BLOOM Inference Python gRPC Server
|
||||
|
||||
A Python gRPC server for BLOOM Inference
|
||||
|
||||
## Local Install (with poetry)
|
||||
|
||||
```shell
|
||||
make install
|
||||
```
|
||||
|
||||
## Local Install (with pip)
|
||||
|
||||
```shell
|
||||
make pip-install
|
||||
```
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,48 @@
|
|||
import torch
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
from bloom_inference.pb import generate_pb2
|
||||
from bloom_inference.utils import NextTokenChooser, StoppingCriteria
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEntry:
|
||||
batch_id: int
|
||||
request_ids: List[int]
|
||||
input_ids: Dict[str, torch.Tensor]
|
||||
all_input_ids: List[torch.Tensor]
|
||||
next_token_choosers: List[NextTokenChooser]
|
||||
stopping_criterias: List[StoppingCriteria]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.request_ids)
|
||||
|
||||
def to_pb(self):
|
||||
return generate_pb2.CacheEntry(
|
||||
id=self.batch_id,
|
||||
request_ids=self.request_ids,
|
||||
sequence_length=max(len(entry) for entry in self.all_input_ids),
|
||||
)
|
||||
|
||||
|
||||
class Cache:
|
||||
def __init__(self):
|
||||
self.cache: Dict[str, CacheEntry] = {}
|
||||
|
||||
def pop(self, batch_id: str) -> Optional[CacheEntry]:
|
||||
return self.cache.pop(batch_id, None)
|
||||
|
||||
def set(self, entry: CacheEntry):
|
||||
if entry is not None:
|
||||
self.cache[entry.batch_id] = entry
|
||||
|
||||
def delete(self, batch_id: str):
|
||||
del self.cache[batch_id]
|
||||
|
||||
def clear(self):
|
||||
self.cache.clear()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.cache.keys())
|
|
@ -0,0 +1,30 @@
|
|||
import typer
|
||||
|
||||
from pathlib import Path
|
||||
from torch.distributed.launcher import launch_agent, LaunchConfig
|
||||
from typing import Optional
|
||||
|
||||
from bloom_inference.server import serve
|
||||
|
||||
|
||||
def main(
|
||||
model_name: str,
|
||||
num_gpus: int = 1,
|
||||
shard_directory: Optional[Path] = None,
|
||||
):
|
||||
if num_gpus == 1:
|
||||
serve(model_name, False, shard_directory)
|
||||
|
||||
else:
|
||||
config = LaunchConfig(
|
||||
min_nodes=1,
|
||||
max_nodes=1,
|
||||
nproc_per_node=num_gpus,
|
||||
rdzv_backend="c10d",
|
||||
max_restarts=0,
|
||||
)
|
||||
launch_agent(config, serve, [model_name, True, shard_directory])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
typer.run(main)
|
|
@ -0,0 +1,428 @@
|
|||
import torch
|
||||
import torch.distributed
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional, Dict
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
||||
from transformers.modeling_utils import no_init_weights
|
||||
|
||||
from bloom_inference.cache import CacheEntry
|
||||
from bloom_inference.pb import generate_pb2
|
||||
from bloom_inference.shard_model import shard_model, match_suffix
|
||||
from bloom_inference.utils import (
|
||||
StoppingCriteria,
|
||||
NextTokenChooser,
|
||||
initialize_torch_distributed,
|
||||
set_default_dtype,
|
||||
)
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Batch:
|
||||
batch_id: int
|
||||
request_ids: List[int]
|
||||
input_ids: Dict[str, torch.Tensor]
|
||||
all_input_ids: List[torch.Tensor]
|
||||
next_token_choosers: List[NextTokenChooser]
|
||||
stopping_criterias: List[StoppingCriteria]
|
||||
|
||||
@classmethod
|
||||
def from_batch_pb(
|
||||
cls, pb: generate_pb2.Batch, tokenizer: AutoTokenizer, device: torch.device
|
||||
) -> "Batch":
|
||||
request_ids = []
|
||||
inputs = []
|
||||
next_token_choosers = []
|
||||
stopping_criterias = []
|
||||
|
||||
# Parse batch
|
||||
for r in pb.requests:
|
||||
request_ids.append(r.id)
|
||||
inputs.append(r.inputs)
|
||||
next_token_choosers.append(
|
||||
NextTokenChooser(
|
||||
temperature=r.parameters.temperature,
|
||||
top_k=r.parameters.top_k,
|
||||
top_p=r.parameters.top_p,
|
||||
do_sample=r.parameters.do_sample,
|
||||
)
|
||||
)
|
||||
stopping_criterias.append(StoppingCriteria(max_new_tokens=r.max_new_tokens))
|
||||
|
||||
input_ids = tokenizer(inputs, return_tensors="pt", padding=True).to(device)
|
||||
all_input_ids = input_ids["input_ids"].unsqueeze(-1)
|
||||
|
||||
return cls(
|
||||
pb.id,
|
||||
request_ids,
|
||||
input_ids,
|
||||
all_input_ids,
|
||||
next_token_choosers,
|
||||
stopping_criterias,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_cache_entry(cls, cache_entry: CacheEntry) -> "Batch":
|
||||
return cls(
|
||||
cache_entry.batch_id,
|
||||
cache_entry.request_ids,
|
||||
cache_entry.input_ids,
|
||||
cache_entry.all_input_ids,
|
||||
cache_entry.next_token_choosers,
|
||||
cache_entry.stopping_criterias,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_batch_cached_pb(cls, pb: generate_pb2.BatchCached, cache) -> "Batch":
|
||||
if len(pb.batch_cached_ids) == 1:
|
||||
cache_entry = cache.pop(pb.batch_cached_ids[0])
|
||||
if cache_entry is None:
|
||||
raise ValueError(f"Batch ID {pb.batch_id} not found in cache")
|
||||
return cls.from_cache_entry(cache_entry)
|
||||
|
||||
total_batch_size = pb.total_batch_size
|
||||
max_sequence_length = pb.max_sequence_length
|
||||
input_ids = {"input_ids": None, "attention_mask": None, "past_key_values": []}
|
||||
request_ids = []
|
||||
all_input_ids = []
|
||||
next_token_choosers = []
|
||||
stopping_criterias = []
|
||||
start_index = 0
|
||||
for i, batch_id in enumerate(pb.batch_cached_ids):
|
||||
cache_entry = cache.pop(batch_id)
|
||||
if cache_entry is None:
|
||||
raise ValueError(f"Batch ID {batch_id} not found in cache")
|
||||
request_ids.extend(cache_entry.request_ids)
|
||||
all_input_ids.extend(cache_entry.all_input_ids)
|
||||
next_token_choosers.extend(cache_entry.next_token_choosers)
|
||||
stopping_criterias.extend(cache_entry.stopping_criterias)
|
||||
|
||||
batch_size = len(cache_entry.request_ids)
|
||||
end_index = start_index + batch_size
|
||||
sequence_length = max(len(entry) for entry in cache_entry.all_input_ids)
|
||||
|
||||
if input_ids["input_ids"] is None:
|
||||
input_ids["input_ids"] = torch.empty(
|
||||
(total_batch_size, 1),
|
||||
dtype=cache_entry.input_ids["input_ids"].dtype,
|
||||
device=cache_entry.input_ids["input_ids"].device,
|
||||
)
|
||||
|
||||
input_ids["input_ids"][start_index:end_index] = cache_entry.input_ids[
|
||||
"input_ids"
|
||||
]
|
||||
|
||||
if input_ids["attention_mask"] is None:
|
||||
input_ids["attention_mask"] = torch.zeros(
|
||||
(total_batch_size, max_sequence_length),
|
||||
dtype=cache_entry.input_ids["attention_mask"].dtype,
|
||||
device=cache_entry.input_ids["attention_mask"].device,
|
||||
)
|
||||
|
||||
input_ids["attention_mask"][
|
||||
start_index:end_index, -sequence_length:
|
||||
] = cache_entry.input_ids["attention_mask"][:, -sequence_length:]
|
||||
|
||||
for j, past in enumerate(cache_entry.input_ids["past_key_values"]):
|
||||
# TODO: this could be done without the views by using indices
|
||||
past_keys = past[0]
|
||||
past_values = past[1]
|
||||
|
||||
_, head_dim, padded_sequence_length = past_keys.shape
|
||||
|
||||
past_keys = past_keys.view(
|
||||
batch_size, -1, head_dim, padded_sequence_length
|
||||
)
|
||||
past_values = past_values.view(
|
||||
batch_size, -1, padded_sequence_length, head_dim
|
||||
)
|
||||
num_heads = past_keys.shape[1]
|
||||
|
||||
if j == len(input_ids["past_key_values"]):
|
||||
padded_past_keys = torch.zeros(
|
||||
(
|
||||
total_batch_size,
|
||||
num_heads,
|
||||
head_dim,
|
||||
max_sequence_length - 1,
|
||||
),
|
||||
dtype=past_keys.dtype,
|
||||
device=past_keys.device,
|
||||
)
|
||||
padded_past_values = torch.zeros(
|
||||
(
|
||||
total_batch_size,
|
||||
num_heads,
|
||||
max_sequence_length - 1,
|
||||
head_dim,
|
||||
),
|
||||
dtype=past_values.dtype,
|
||||
device=past_values.device,
|
||||
)
|
||||
input_ids["past_key_values"].append(
|
||||
[padded_past_keys, padded_past_values]
|
||||
)
|
||||
|
||||
input_ids["past_key_values"][j][0][
|
||||
start_index:end_index, :, :, -(sequence_length - 1):
|
||||
] = past_keys[:, :, :, -(sequence_length - 1):]
|
||||
|
||||
input_ids["past_key_values"][j][1][
|
||||
start_index:end_index, :, -(sequence_length - 1):, :
|
||||
] = past_values[:, :, -(sequence_length - 1):, :]
|
||||
|
||||
if (i + 1) == len(pb.batch_cached_ids):
|
||||
input_ids["past_key_values"][j][0] = input_ids["past_key_values"][
|
||||
j
|
||||
][0].view(total_batch_size * num_heads, head_dim, -1)
|
||||
input_ids["past_key_values"][j][1] = input_ids["past_key_values"][
|
||||
j
|
||||
][1].view(total_batch_size * num_heads, -1, head_dim)
|
||||
|
||||
start_index += batch_size
|
||||
|
||||
assert pb.request_ids == request_ids
|
||||
|
||||
return cls(
|
||||
pb.id,
|
||||
request_ids,
|
||||
input_ids,
|
||||
all_input_ids,
|
||||
next_token_choosers,
|
||||
stopping_criterias,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FinishedGeneration:
|
||||
request_id: str
|
||||
output: str
|
||||
|
||||
def to_pb(self) -> generate_pb2.FinishedGeneration:
|
||||
return generate_pb2.FinishedGeneration(id=self.request_id, output=self.output)
|
||||
|
||||
|
||||
class BLOOM:
|
||||
def __init__(self, model_name: str):
|
||||
if torch.cuda.is_available():
|
||||
self.device = torch.device("cuda")
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
|
||||
self.model = (
|
||||
AutoModelForCausalLM.from_pretrained(model_name).eval().to(self.device)
|
||||
)
|
||||
self.num_heads = self.model.base_model.num_heads
|
||||
|
||||
def forward(self, input_ids, attention_mask, past_key_values: Optional = None):
|
||||
# Model Forward
|
||||
return self.model.forward(
|
||||
input_ids=input_ids,
|
||||
attention_mask=attention_mask,
|
||||
past_key_values=past_key_values,
|
||||
use_cache=True,
|
||||
)
|
||||
|
||||
def generate_token(
|
||||
self, batch: Batch
|
||||
) -> Tuple[List[FinishedGeneration], Optional[CacheEntry]]:
|
||||
with torch.no_grad():
|
||||
outputs = self.forward(**batch.input_ids)
|
||||
|
||||
# List of indices to cache
|
||||
cache_indices = []
|
||||
cache_past_indices = []
|
||||
|
||||
# New input_ids for next forward; keep in cache
|
||||
cache_next_input_ids = []
|
||||
cache_all_input_ids = []
|
||||
|
||||
# Finished requests
|
||||
finished_generations: List[FinishedGeneration] = []
|
||||
|
||||
# Zipped iterator
|
||||
iterator = zip(
|
||||
batch.request_ids,
|
||||
outputs.logits,
|
||||
batch.next_token_choosers,
|
||||
batch.stopping_criterias,
|
||||
batch.all_input_ids,
|
||||
)
|
||||
|
||||
# For each member of the batch
|
||||
for i, (
|
||||
request_id,
|
||||
logits,
|
||||
next_token_chooser,
|
||||
stopping_criteria,
|
||||
all_tokens,
|
||||
) in enumerate(iterator):
|
||||
# Select next token
|
||||
next_token = next_token_chooser(all_tokens, logits.unsqueeze(0)[:, -1])
|
||||
|
||||
# Append next token to all tokens
|
||||
all_tokens = torch.cat([all_tokens, next_token])
|
||||
|
||||
# Evaluate stopping criteria
|
||||
if stopping_criteria(all_tokens):
|
||||
# Decode all tokens
|
||||
output = self.tokenizer.decode(
|
||||
all_tokens.squeeze(-1), skip_special_tokens=True
|
||||
)
|
||||
# Add to the list of finished generations with the original request id
|
||||
finished_generations.append(FinishedGeneration(request_id, output))
|
||||
# must be added to the cache
|
||||
else:
|
||||
cache_indices.append(i)
|
||||
cache_past_indices.extend([j for j in range(i * self.num_heads, (i + 1) * self.num_heads)])
|
||||
cache_next_input_ids.append(next_token)
|
||||
cache_all_input_ids.append(all_tokens)
|
||||
|
||||
# No cache is needed, we finished all generations in the batch
|
||||
if not cache_indices:
|
||||
return finished_generations, None
|
||||
|
||||
# If we finished at least one generation
|
||||
cache_input_ids = {"input_ids": torch.cat(cache_next_input_ids, dim=0)}
|
||||
if finished_generations:
|
||||
# Apply indices to attention mask, past key values and other items that need to be cached
|
||||
cache_input_ids["attention_mask"] = batch.input_ids["attention_mask"][
|
||||
cache_indices
|
||||
]
|
||||
cache_input_ids["past_key_values"] = [
|
||||
(keys[cache_past_indices], values[cache_past_indices])
|
||||
for keys, values in outputs["past_key_values"]
|
||||
]
|
||||
cache_request_ids = [batch.request_ids[i] for i in cache_indices]
|
||||
cache_next_token_choosers = [
|
||||
batch.next_token_choosers[i] for i in cache_indices
|
||||
]
|
||||
cache_stopping_criterias = [
|
||||
batch.stopping_criterias[i] for i in cache_indices
|
||||
]
|
||||
else:
|
||||
cache_input_ids["attention_mask"] = batch.input_ids["attention_mask"]
|
||||
cache_input_ids["past_key_values"] = outputs["past_key_values"]
|
||||
cache_request_ids = batch.request_ids
|
||||
cache_next_token_choosers = batch.next_token_choosers
|
||||
cache_stopping_criterias = batch.stopping_criterias
|
||||
|
||||
# Update attention_mask with padding as we added a new token to input_ids
|
||||
cache_input_ids["attention_mask"] = torch.cat(
|
||||
[
|
||||
cache_input_ids["attention_mask"],
|
||||
torch.ones((cache_input_ids["attention_mask"].shape[0], 1)).to(
|
||||
cache_input_ids["attention_mask"].device
|
||||
),
|
||||
],
|
||||
dim=1,
|
||||
)
|
||||
|
||||
cache_entry = CacheEntry(
|
||||
batch.batch_id,
|
||||
cache_request_ids,
|
||||
cache_input_ids,
|
||||
cache_all_input_ids,
|
||||
cache_next_token_choosers,
|
||||
cache_stopping_criterias,
|
||||
)
|
||||
return finished_generations, cache_entry
|
||||
|
||||
|
||||
class BLOOMSharded(BLOOM):
|
||||
def __init__(self, model_name: str, shard_directory: Path):
|
||||
super(BLOOM, self).__init__()
|
||||
self.process_group, self.rank, self.world_size = initialize_torch_distributed()
|
||||
self.master = self.rank == 0
|
||||
if torch.cuda.is_available():
|
||||
self.device = torch.device(f"cuda:{self.rank}")
|
||||
dtype = torch.bfloat16
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
dtype = torch.float32
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
|
||||
|
||||
# shard state_dict
|
||||
if self.master:
|
||||
# TODO @thomasw21 do some caching
|
||||
shard_state_dict_paths = shard_model(
|
||||
model_name, shard_directory, tp_world_size=self.world_size, dtype=dtype
|
||||
)
|
||||
shard_state_dict_paths = [
|
||||
str(path.absolute()) for path in shard_state_dict_paths
|
||||
]
|
||||
else:
|
||||
shard_state_dict_paths = [None] * self.world_size
|
||||
|
||||
torch.distributed.broadcast_object_list(
|
||||
shard_state_dict_paths, src=0, group=self.process_group
|
||||
)
|
||||
shard_state_dict_path = shard_state_dict_paths[self.rank]
|
||||
|
||||
config = AutoConfig.from_pretrained(
|
||||
model_name, slow_but_exact=False, tp_parallel=True
|
||||
)
|
||||
config.pad_token_id = 3
|
||||
|
||||
# The flag below controls whether to allow TF32 on matmul. This flag defaults to False
|
||||
# in PyTorch 1.12 and later.
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
|
||||
# The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
|
||||
torch.backends.cudnn.allow_tf32 = True
|
||||
|
||||
with set_default_dtype(dtype):
|
||||
with no_init_weights():
|
||||
# we can probably set the device to `meta` here?
|
||||
model = AutoModelForCausalLM.from_config(config).to(dtype)
|
||||
|
||||
torch.distributed.barrier(group=self.process_group)
|
||||
# print_rank_0(f"Initialized model")
|
||||
state_dict = torch.load(shard_state_dict_path)
|
||||
# TODO @thomasw21: HACK in order to transpose all weight prior
|
||||
for key in state_dict.keys():
|
||||
do_transpose = False
|
||||
if not match_suffix(key, "weight"):
|
||||
continue
|
||||
|
||||
for potential_suffix in [
|
||||
"self_attention.query_key_value.weight",
|
||||
"self_attention.dense.weight",
|
||||
"dense_h_to_4h.weight",
|
||||
"dense_4h_to_h.weight",
|
||||
]:
|
||||
if match_suffix(key, potential_suffix):
|
||||
do_transpose = True
|
||||
|
||||
if do_transpose:
|
||||
state_dict[key] = state_dict[key].transpose(1, 0).contiguous()
|
||||
|
||||
model.load_state_dict(state_dict)
|
||||
self.model = model.to(self.device).eval()
|
||||
self.num_heads = config.n_head // self.process_group.size()
|
||||
torch.distributed.barrier(group=self.process_group)
|
||||
|
||||
def forward(self, input_ids, attention_mask, past_key_values: Optional = None):
|
||||
outputs = self.model.forward(
|
||||
input_ids=input_ids,
|
||||
attention_mask=attention_mask,
|
||||
past_key_values=past_key_values,
|
||||
use_cache=True,
|
||||
)
|
||||
|
||||
logits_shard = outputs.logits[:, -1, :].contiguous()
|
||||
|
||||
batch_size, vocab_shard_size = logits_shard.shape
|
||||
vocab_size = self.world_size * vocab_shard_size
|
||||
logits = [torch.empty_like(logits_shard) for _ in range(self.world_size)]
|
||||
torch.distributed.all_gather(logits, logits_shard, group=self.process_group)
|
||||
logits = torch.cat(logits, dim=1).view(batch_size, 1, vocab_size)
|
||||
|
||||
outputs.logits = logits
|
||||
return outputs
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,43 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# source: generate.proto
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf.internal import builder as _builder
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0egenerate.proto\x12\x0bgenerate.v1\"(\n\x18ServiceDiscoveryResponse\x12\x0c\n\x04urls\x18\x01 \x03(\t\"^\n\x16LogitsWarperParameters\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_k\x18\x02 \x01(\r\x12\r\n\x05top_p\x18\x03 \x01(\x02\x12\x11\n\tdo_sample\x18\x04 \x01(\x08\"v\n\x07Request\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06inputs\x18\x02 \x01(\t\x12\x37\n\nparameters\x18\x03 \x01(\x0b\x32#.generate.v1.LogitsWarperParameters\x12\x16\n\x0emax_new_tokens\x18\x04 \x01(\r\";\n\x05\x42\x61tch\x12\n\n\x02id\x18\x01 \x01(\x04\x12&\n\x08requests\x18\x02 \x03(\x0b\x32\x14.generate.v1.Request\"\x7f\n\x0b\x42\x61tchCached\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x18\n\x10\x62\x61tch_cached_ids\x18\x03 \x03(\x04\x12\x18\n\x10total_batch_size\x18\x04 \x01(\r\x12\x1b\n\x13max_sequence_length\x18\x05 \x01(\r\"0\n\x12\x46inishedGeneration\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06output\x18\x02 \x01(\t\"F\n\nCacheEntry\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x17\n\x0fsequence_length\x18\x03 \x01(\r\"\x80\x01\n\x08Response\x12\x31\n\x08\x66inished\x18\x01 \x03(\x0b\x32\x1f.generate.v1.FinishedGeneration\x12\x31\n\x0b\x63\x61\x63he_entry\x18\x02 \x01(\x0b\x32\x17.generate.v1.CacheEntryH\x00\x88\x01\x01\x42\x0e\n\x0c_cache_entry\"\x07\n\x05\x45mpty2\x94\x02\n\x0eTextGeneration\x12O\n\x10ServiceDiscovery\x12\x12.generate.v1.Empty\x1a%.generate.v1.ServiceDiscoveryResponse\"\x00\x12\x34\n\nClearCache\x12\x12.generate.v1.Empty\x1a\x12.generate.v1.Empty\x12\x35\n\x08Generate\x12\x12.generate.v1.Batch\x1a\x15.generate.v1.Response\x12\x44\n\x11GenerateWithCache\x12\x18.generate.v1.BatchCached\x1a\x15.generate.v1.Responseb\x06proto3')
|
||||
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'generate_pb2', globals())
|
||||
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||
|
||||
DESCRIPTOR._options = None
|
||||
_SERVICEDISCOVERYRESPONSE._serialized_start=31
|
||||
_SERVICEDISCOVERYRESPONSE._serialized_end=71
|
||||
_LOGITSWARPERPARAMETERS._serialized_start=73
|
||||
_LOGITSWARPERPARAMETERS._serialized_end=167
|
||||
_REQUEST._serialized_start=169
|
||||
_REQUEST._serialized_end=287
|
||||
_BATCH._serialized_start=289
|
||||
_BATCH._serialized_end=348
|
||||
_BATCHCACHED._serialized_start=350
|
||||
_BATCHCACHED._serialized_end=477
|
||||
_FINISHEDGENERATION._serialized_start=479
|
||||
_FINISHEDGENERATION._serialized_end=527
|
||||
_CACHEENTRY._serialized_start=529
|
||||
_CACHEENTRY._serialized_end=599
|
||||
_RESPONSE._serialized_start=602
|
||||
_RESPONSE._serialized_end=730
|
||||
_EMPTY._serialized_start=732
|
||||
_EMPTY._serialized_end=739
|
||||
_TEXTGENERATION._serialized_start=742
|
||||
_TEXTGENERATION._serialized_end=1018
|
||||
# @@protoc_insertion_point(module_scope)
|
|
@ -0,0 +1,43 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# source: generate.proto
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf.internal import builder as _builder
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0egenerate.proto\x12\x0bgenerate.v1\"(\n\x18ServiceDiscoveryResponse\x12\x0c\n\x04urls\x18\x01 \x03(\t\"^\n\x16LogitsWarperParameters\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_k\x18\x02 \x01(\r\x12\r\n\x05top_p\x18\x03 \x01(\x02\x12\x11\n\tdo_sample\x18\x04 \x01(\x08\"v\n\x07Request\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06inputs\x18\x02 \x01(\t\x12\x37\n\nparameters\x18\x03 \x01(\x0b\x32#.generate.v1.LogitsWarperParameters\x12\x16\n\x0emax_new_tokens\x18\x04 \x01(\r\";\n\x05\x42\x61tch\x12\n\n\x02id\x18\x01 \x01(\x04\x12&\n\x08requests\x18\x02 \x03(\x0b\x32\x14.generate.v1.Request\"\x7f\n\x0b\x42\x61tchCached\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x18\n\x10\x62\x61tch_cached_ids\x18\x03 \x03(\x04\x12\x18\n\x10total_batch_size\x18\x04 \x01(\r\x12\x1b\n\x13max_sequence_length\x18\x05 \x01(\r\"0\n\x12\x46inishedGeneration\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06output\x18\x02 \x01(\t\"F\n\nCacheEntry\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x17\n\x0fsequence_length\x18\x03 \x01(\r\"\x80\x01\n\x08Response\x12\x31\n\x08\x66inished\x18\x01 \x03(\x0b\x32\x1f.generate.v1.FinishedGeneration\x12\x31\n\x0b\x63\x61\x63he_entry\x18\x02 \x01(\x0b\x32\x17.generate.v1.CacheEntryH\x00\x88\x01\x01\x42\x0e\n\x0c_cache_entry\"\x07\n\x05\x45mpty2\x94\x02\n\x0eTextGeneration\x12O\n\x10ServiceDiscovery\x12\x12.generate.v1.Empty\x1a%.generate.v1.ServiceDiscoveryResponse\"\x00\x12\x34\n\nClearCache\x12\x12.generate.v1.Empty\x1a\x12.generate.v1.Empty\x12\x35\n\x08Generate\x12\x12.generate.v1.Batch\x1a\x15.generate.v1.Response\x12\x44\n\x11GenerateWithCache\x12\x18.generate.v1.BatchCached\x1a\x15.generate.v1.Responseb\x06proto3')
|
||||
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'generate_pb2', globals())
|
||||
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||
|
||||
DESCRIPTOR._options = None
|
||||
_SERVICEDISCOVERYRESPONSE._serialized_start=31
|
||||
_SERVICEDISCOVERYRESPONSE._serialized_end=71
|
||||
_LOGITSWARPERPARAMETERS._serialized_start=73
|
||||
_LOGITSWARPERPARAMETERS._serialized_end=167
|
||||
_REQUEST._serialized_start=169
|
||||
_REQUEST._serialized_end=287
|
||||
_BATCH._serialized_start=289
|
||||
_BATCH._serialized_end=348
|
||||
_BATCHCACHED._serialized_start=350
|
||||
_BATCHCACHED._serialized_end=477
|
||||
_FINISHEDGENERATION._serialized_start=479
|
||||
_FINISHEDGENERATION._serialized_end=527
|
||||
_CACHEENTRY._serialized_start=529
|
||||
_CACHEENTRY._serialized_end=599
|
||||
_RESPONSE._serialized_start=602
|
||||
_RESPONSE._serialized_end=730
|
||||
_EMPTY._serialized_start=732
|
||||
_EMPTY._serialized_end=739
|
||||
_TEXTGENERATION._serialized_start=742
|
||||
_TEXTGENERATION._serialized_end=1018
|
||||
# @@protoc_insertion_point(module_scope)
|
|
@ -0,0 +1,169 @@
|
|||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
|
||||
from . import generate_pb2 as generate__pb2
|
||||
|
||||
|
||||
class TextGenerationStub(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def __init__(self, channel):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
channel: A grpc.Channel.
|
||||
"""
|
||||
self.ServiceDiscovery = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/ServiceDiscovery',
|
||||
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||
response_deserializer=generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||
)
|
||||
self.ClearCache = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/ClearCache',
|
||||
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||
response_deserializer=generate__pb2.Empty.FromString,
|
||||
)
|
||||
self.Generate = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/Generate',
|
||||
request_serializer=generate__pb2.Batch.SerializeToString,
|
||||
response_deserializer=generate__pb2.Response.FromString,
|
||||
)
|
||||
self.GenerateWithCache = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/GenerateWithCache',
|
||||
request_serializer=generate__pb2.BatchCached.SerializeToString,
|
||||
response_deserializer=generate__pb2.Response.FromString,
|
||||
)
|
||||
|
||||
|
||||
class TextGenerationServicer(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def ServiceDiscovery(self, request, context):
|
||||
"""/ Service discovery
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def ClearCache(self, request, context):
|
||||
"""/ Empties batch cache
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Generate(self, request, context):
|
||||
"""/ Generate tokens for a batch without cache
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def GenerateWithCache(self, request, context):
|
||||
"""/ Generate tokens for a batch with cache
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
|
||||
def add_TextGenerationServicer_to_server(servicer, server):
|
||||
rpc_method_handlers = {
|
||||
'ServiceDiscovery': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.ServiceDiscovery,
|
||||
request_deserializer=generate__pb2.Empty.FromString,
|
||||
response_serializer=generate__pb2.ServiceDiscoveryResponse.SerializeToString,
|
||||
),
|
||||
'ClearCache': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.ClearCache,
|
||||
request_deserializer=generate__pb2.Empty.FromString,
|
||||
response_serializer=generate__pb2.Empty.SerializeToString,
|
||||
),
|
||||
'Generate': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Generate,
|
||||
request_deserializer=generate__pb2.Batch.FromString,
|
||||
response_serializer=generate__pb2.Response.SerializeToString,
|
||||
),
|
||||
'GenerateWithCache': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.GenerateWithCache,
|
||||
request_deserializer=generate__pb2.BatchCached.FromString,
|
||||
response_serializer=generate__pb2.Response.SerializeToString,
|
||||
),
|
||||
}
|
||||
generic_handler = grpc.method_handlers_generic_handler(
|
||||
'generate.v1.TextGeneration', rpc_method_handlers)
|
||||
server.add_generic_rpc_handlers((generic_handler,))
|
||||
|
||||
|
||||
# This class is part of an EXPERIMENTAL API.
|
||||
class TextGeneration(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
@staticmethod
|
||||
def ServiceDiscovery(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ServiceDiscovery',
|
||||
generate__pb2.Empty.SerializeToString,
|
||||
generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def ClearCache(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ClearCache',
|
||||
generate__pb2.Empty.SerializeToString,
|
||||
generate__pb2.Empty.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Generate(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/Generate',
|
||||
generate__pb2.Batch.SerializeToString,
|
||||
generate__pb2.Response.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def GenerateWithCache(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/GenerateWithCache',
|
||||
generate__pb2.BatchCached.SerializeToString,
|
||||
generate__pb2.Response.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
@ -0,0 +1,169 @@
|
|||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
|
||||
import generate_pb2 as generate__pb2
|
||||
|
||||
|
||||
class TextGenerationStub(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def __init__(self, channel):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
channel: A grpc.Channel.
|
||||
"""
|
||||
self.ServiceDiscovery = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/ServiceDiscovery',
|
||||
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||
response_deserializer=generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||
)
|
||||
self.ClearCache = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/ClearCache',
|
||||
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||
response_deserializer=generate__pb2.Empty.FromString,
|
||||
)
|
||||
self.Generate = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/Generate',
|
||||
request_serializer=generate__pb2.Batch.SerializeToString,
|
||||
response_deserializer=generate__pb2.Response.FromString,
|
||||
)
|
||||
self.GenerateWithCache = channel.unary_unary(
|
||||
'/generate.v1.TextGeneration/GenerateWithCache',
|
||||
request_serializer=generate__pb2.BatchCached.SerializeToString,
|
||||
response_deserializer=generate__pb2.Response.FromString,
|
||||
)
|
||||
|
||||
|
||||
class TextGenerationServicer(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def ServiceDiscovery(self, request, context):
|
||||
"""/ Service discovery
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def ClearCache(self, request, context):
|
||||
"""/ Empties batch cache
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Generate(self, request, context):
|
||||
"""/ Generate tokens for a batch without cache
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def GenerateWithCache(self, request, context):
|
||||
"""/ Generate tokens for a batch with cache
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
|
||||
def add_TextGenerationServicer_to_server(servicer, server):
|
||||
rpc_method_handlers = {
|
||||
'ServiceDiscovery': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.ServiceDiscovery,
|
||||
request_deserializer=generate__pb2.Empty.FromString,
|
||||
response_serializer=generate__pb2.ServiceDiscoveryResponse.SerializeToString,
|
||||
),
|
||||
'ClearCache': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.ClearCache,
|
||||
request_deserializer=generate__pb2.Empty.FromString,
|
||||
response_serializer=generate__pb2.Empty.SerializeToString,
|
||||
),
|
||||
'Generate': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Generate,
|
||||
request_deserializer=generate__pb2.Batch.FromString,
|
||||
response_serializer=generate__pb2.Response.SerializeToString,
|
||||
),
|
||||
'GenerateWithCache': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.GenerateWithCache,
|
||||
request_deserializer=generate__pb2.BatchCached.FromString,
|
||||
response_serializer=generate__pb2.Response.SerializeToString,
|
||||
),
|
||||
}
|
||||
generic_handler = grpc.method_handlers_generic_handler(
|
||||
'generate.v1.TextGeneration', rpc_method_handlers)
|
||||
server.add_generic_rpc_handlers((generic_handler,))
|
||||
|
||||
|
||||
# This class is part of an EXPERIMENTAL API.
|
||||
class TextGeneration(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
@staticmethod
|
||||
def ServiceDiscovery(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ServiceDiscovery',
|
||||
generate__pb2.Empty.SerializeToString,
|
||||
generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def ClearCache(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ClearCache',
|
||||
generate__pb2.Empty.SerializeToString,
|
||||
generate__pb2.Empty.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Generate(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/Generate',
|
||||
generate__pb2.Batch.SerializeToString,
|
||||
generate__pb2.Response.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def GenerateWithCache(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/GenerateWithCache',
|
||||
generate__pb2.BatchCached.SerializeToString,
|
||||
generate__pb2.Response.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
@ -0,0 +1,124 @@
|
|||
import torch
|
||||
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
MODEL_NAME = "bigscience/bloom"
|
||||
|
||||
|
||||
def match_suffix(text, suffix):
|
||||
return text[-len(suffix) :] == suffix
|
||||
|
||||
|
||||
def prepare_weights(hub_path: Path, save_path: Path, tp_world_size: int):
|
||||
save_paths = [
|
||||
save_path / f"{MODEL_NAME}_tp-rank-{tp_rank}-of-{tp_world_size}.pty"
|
||||
for tp_rank in range(tp_world_size)
|
||||
]
|
||||
|
||||
if all(save_path.exists() for save_path in save_paths):
|
||||
print("Weights are already prepared")
|
||||
return
|
||||
|
||||
shards_state_dicts = [{} for _ in range(tp_world_size)]
|
||||
|
||||
for weight_path in tqdm(hub_path.glob("*.bin")):
|
||||
state_dict = torch.load(weight_path, map_location="cpu")
|
||||
|
||||
keys = list(state_dict.keys())
|
||||
for state_name in keys:
|
||||
state = state_dict[state_name]
|
||||
if any(
|
||||
match_suffix(state_name, candidate)
|
||||
for candidate in [
|
||||
"self_attention.query_key_value.weight",
|
||||
"self_attention.query_key_value.bias",
|
||||
"mlp.dense_h_to_4h.weight",
|
||||
"mlp.dense_h_to_4h.bias",
|
||||
"word_embeddings.weight",
|
||||
"lm_head.weight",
|
||||
]
|
||||
):
|
||||
output_size = state.shape[0]
|
||||
assert output_size % tp_world_size == 0
|
||||
block_size = output_size // tp_world_size
|
||||
sharded_weights = torch.split(state, block_size, dim=0)
|
||||
assert len(sharded_weights) == tp_world_size
|
||||
for tp_rank, shard in enumerate(sharded_weights):
|
||||
assert shard.shape[0] == block_size
|
||||
if match_suffix(state_name, "lm_head.weight"):
|
||||
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||
else:
|
||||
shards_state_dicts[tp_rank][
|
||||
"transformer." + state_name
|
||||
] = shard.detach().clone()
|
||||
elif any(
|
||||
match_suffix(state_name, candidate)
|
||||
for candidate in [
|
||||
"self_attention.dense.weight",
|
||||
"mlp.dense_4h_to_h.weight",
|
||||
"lm_head.weight",
|
||||
]
|
||||
):
|
||||
input_size = state.shape[1]
|
||||
assert input_size % tp_world_size == 0
|
||||
block_size = input_size // tp_world_size
|
||||
sharded_weights = torch.split(state, block_size, dim=1)
|
||||
assert len(sharded_weights) == tp_world_size
|
||||
for tp_rank, shard in enumerate(sharded_weights):
|
||||
assert shard.shape[1] == block_size
|
||||
if match_suffix(state_name, "lm_head.weight"):
|
||||
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||
else:
|
||||
shards_state_dicts[tp_rank][
|
||||
"transformer." + state_name
|
||||
] = shard.detach().clone()
|
||||
elif any(
|
||||
match_suffix(state_name, candidate)
|
||||
for candidate in [
|
||||
"self_attention.dense.bias",
|
||||
"mlp.dense_4h_to_h.bias",
|
||||
]
|
||||
):
|
||||
shards_state_dicts[0][
|
||||
"transformer." + state_name
|
||||
] = state.detach().clone()
|
||||
for tp_rank in range(1, tp_world_size):
|
||||
shards_state_dicts[tp_rank][
|
||||
"transformer." + state_name
|
||||
] = torch.zeros_like(state)
|
||||
else:
|
||||
# We duplicate parameters across tp ranks
|
||||
for tp_rank in range(tp_world_size):
|
||||
shards_state_dicts[tp_rank][
|
||||
"transformer." + state_name
|
||||
] = state.detach().clone()
|
||||
|
||||
del state_dict[state_name] # delete key from state_dict
|
||||
del state # delete tensor
|
||||
|
||||
# we save state_dict
|
||||
for tp_rank, (save_path, shard_state_dict) in enumerate(
|
||||
zip(save_paths, shards_state_dicts)
|
||||
):
|
||||
save_paths.append(save_path)
|
||||
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if save_path.exists():
|
||||
print(f"Skipping {save_path} as it already exists")
|
||||
else:
|
||||
torch.save(shard_state_dict, save_path)
|
||||
|
||||
return save_paths
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from argparse import ArgumentParser
|
||||
|
||||
parser = ArgumentParser()
|
||||
|
||||
parser.add_argument("--hub-path", required=True, type=str)
|
||||
parser.add_argument("--save-path", required=True, type=str)
|
||||
parser.add_argument("--world-size", required=True, type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
prepare_weights(Path(args.hub_path), Path(args.save_path), args.world_size)
|
|
@ -0,0 +1,91 @@
|
|||
import asyncio
|
||||
from grpc import aio
|
||||
|
||||
from grpc_reflection.v1alpha import reflection
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
|
||||
from bloom_inference.cache import Cache
|
||||
from bloom_inference.model import BLOOM, Batch, BLOOMSharded
|
||||
from bloom_inference.pb import generate_pb2_grpc, generate_pb2
|
||||
|
||||
|
||||
class TextGeneration(generate_pb2_grpc.TextGenerationServicer):
|
||||
def __init__(self, model: BLOOM, cache: Cache, server_urls: List[str]):
|
||||
self.cache = cache
|
||||
self.model = model
|
||||
self.server_urls = server_urls
|
||||
|
||||
async def ServiceDiscovery(self, request, context):
|
||||
return generate_pb2.ServiceDiscoveryResponse(urls=self.server_urls)
|
||||
|
||||
async def ClearCache(self, request, context):
|
||||
self.cache.clear()
|
||||
return generate_pb2.Empty()
|
||||
|
||||
async def Generate(self, request, context):
|
||||
batch = Batch.from_batch_pb(request, self.model.tokenizer, self.model.device)
|
||||
finished_generations, cache_entry = self.model.generate_token(batch)
|
||||
self.cache.set(cache_entry)
|
||||
|
||||
return generate_pb2.Response(
|
||||
finished=[
|
||||
finished_generation.to_pb()
|
||||
for finished_generation in finished_generations
|
||||
],
|
||||
cache_entry=cache_entry.to_pb() if cache_entry else None,
|
||||
)
|
||||
|
||||
async def GenerateWithCache(self, request, context):
|
||||
batch = Batch.from_batch_cached_pb(request, self.cache)
|
||||
finished_generations, cache_entry = self.model.generate_token(batch)
|
||||
self.cache.set(cache_entry)
|
||||
|
||||
return generate_pb2.Response(
|
||||
finished=[
|
||||
finished_generation.to_pb()
|
||||
for finished_generation in finished_generations
|
||||
],
|
||||
cache_entry=cache_entry.to_pb() if cache_entry else None,
|
||||
)
|
||||
|
||||
|
||||
def serve(model_name, sharded, shard_directory):
|
||||
async def serve_inner(
|
||||
model_name: str,
|
||||
sharded: bool = False,
|
||||
shard_directory: Optional[Path] = None,
|
||||
):
|
||||
unix_socket_template = "unix:///tmp/bloom-inference-{}"
|
||||
if sharded:
|
||||
if shard_directory is None:
|
||||
raise ValueError("shard_directory must be set when sharded is True")
|
||||
model = BLOOMSharded(model_name, shard_directory)
|
||||
server_urls = [
|
||||
unix_socket_template.format(rank) for rank in range(model.world_size)
|
||||
]
|
||||
local_url = unix_socket_template.format(model.rank)
|
||||
else:
|
||||
model = BLOOM(model_name)
|
||||
local_url = unix_socket_template.format(0)
|
||||
server_urls = [local_url]
|
||||
|
||||
server = aio.server()
|
||||
generate_pb2_grpc.add_TextGenerationServicer_to_server(
|
||||
TextGeneration(model, Cache(), server_urls), server
|
||||
)
|
||||
SERVICE_NAMES = (
|
||||
generate_pb2.DESCRIPTOR.services_by_name["TextGeneration"].full_name,
|
||||
reflection.SERVICE_NAME,
|
||||
)
|
||||
reflection.enable_server_reflection(SERVICE_NAMES, server)
|
||||
server.add_insecure_port(local_url)
|
||||
await server.start()
|
||||
print("Server started at {}".format(local_url))
|
||||
await server.wait_for_termination()
|
||||
|
||||
asyncio.run(serve_inner(model_name, sharded, shard_directory))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
serve("bigscience/bloom-560m", True, Path("/tmp/models"))
|
|
@ -0,0 +1,102 @@
|
|||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
|
||||
def match_suffix(text, suffix):
|
||||
return text[-len(suffix) :] == suffix
|
||||
|
||||
|
||||
def shard_model(model_name: str, path: Path, tp_world_size: int, dtype: torch.dtype):
|
||||
"""BLOOM specific sharding mechanism"""
|
||||
save_paths = [
|
||||
path / f"{model_name}_tp-rank-{tp_rank}-of-{tp_world_size}.pty"
|
||||
for tp_rank in range(tp_world_size)
|
||||
]
|
||||
if all(save_path.exists() for save_path in save_paths):
|
||||
print("Loading already cached values")
|
||||
return save_paths
|
||||
|
||||
model: nn.Module = AutoModelForCausalLM.from_pretrained(
|
||||
model_name, torch_dtype=dtype, local_files_only=True
|
||||
)
|
||||
|
||||
shards_state_dicts = [{} for _ in range(tp_world_size)]
|
||||
state_dict = model.state_dict()
|
||||
keys = list(state_dict.keys())
|
||||
for state_name in keys:
|
||||
print(state_name)
|
||||
state = state_dict[state_name]
|
||||
if any(
|
||||
match_suffix(state_name, candidate)
|
||||
for candidate in [
|
||||
"self_attention.query_key_value.weight",
|
||||
"self_attention.query_key_value.bias",
|
||||
"mlp.dense_h_to_4h.weight",
|
||||
"mlp.dense_h_to_4h.bias",
|
||||
"transformer.word_embeddings.weight",
|
||||
"lm_head.weight",
|
||||
]
|
||||
):
|
||||
output_size = state.shape[0]
|
||||
assert output_size % tp_world_size == 0
|
||||
block_size = output_size // tp_world_size
|
||||
sharded_weights = torch.split(state, block_size, dim=0)
|
||||
assert len(sharded_weights) == tp_world_size
|
||||
for tp_rank, shard in enumerate(sharded_weights):
|
||||
assert shard.shape[0] == block_size
|
||||
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||
elif any(
|
||||
match_suffix(state_name, candidate)
|
||||
for candidate in [
|
||||
"self_attention.dense.weight",
|
||||
"mlp.dense_4h_to_h.weight",
|
||||
"lm_head.weight",
|
||||
]
|
||||
):
|
||||
input_size = state.shape[1]
|
||||
assert input_size % tp_world_size == 0
|
||||
block_size = input_size // tp_world_size
|
||||
sharded_weights = torch.split(state, block_size, dim=1)
|
||||
assert len(sharded_weights) == tp_world_size
|
||||
for tp_rank, shard in enumerate(sharded_weights):
|
||||
assert shard.shape[1] == block_size
|
||||
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||
elif any(
|
||||
match_suffix(state_name, candidate)
|
||||
for candidate in [
|
||||
"self_attention.dense.bias",
|
||||
"mlp.dense_4h_to_h.bias",
|
||||
]
|
||||
):
|
||||
shards_state_dicts[0][state_name] = state.detach().clone()
|
||||
for tp_rank in range(1, tp_world_size):
|
||||
shards_state_dicts[tp_rank][state_name] = torch.zeros_like(state)
|
||||
else:
|
||||
# We duplicate parameters across tp ranks
|
||||
for tp_rank in range(tp_world_size):
|
||||
shards_state_dicts[tp_rank][state_name] = state.detach().clone()
|
||||
|
||||
del state_dict[state_name] # delete key from state_dict
|
||||
del state # delete tensor
|
||||
|
||||
# we save state_dict
|
||||
for tp_rank, (save_path, shard_state_dict) in enumerate(
|
||||
zip(save_paths, shards_state_dicts)
|
||||
):
|
||||
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
torch.save(shard_state_dict, save_path)
|
||||
save_paths.append(save_path)
|
||||
|
||||
return save_paths
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_name = "bigscience/bloom"
|
||||
save_path = Path("/data/shards")
|
||||
tp_world_size = 8
|
||||
dtype = torch.bfloat16
|
||||
|
||||
shard_model(model_name, save_path, tp_world_size=tp_world_size, dtype=dtype)
|
|
@ -0,0 +1,95 @@
|
|||
import os
|
||||
import contextlib
|
||||
import torch
|
||||
import torch.distributed
|
||||
from transformers.generation_logits_process import (
|
||||
LogitsProcessorList,
|
||||
TemperatureLogitsWarper,
|
||||
TopPLogitsWarper,
|
||||
TopKLogitsWarper,
|
||||
)
|
||||
|
||||
|
||||
class Sampling:
|
||||
def __call__(self, logits):
|
||||
probs = torch.nn.functional.softmax(logits, dim=-1)
|
||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||
return next_tokens
|
||||
|
||||
|
||||
class Greedy:
|
||||
def __call__(self, logits):
|
||||
return logits.argmax(dim=-1)
|
||||
|
||||
|
||||
class NextTokenChooser:
|
||||
def __init__(self, temperature=1.0, top_k=None, top_p=None, do_sample=False):
|
||||
warpers = LogitsProcessorList()
|
||||
# the following idea is largely copied from this PR: https://github.com/huggingface/transformers/pull/5420/files
|
||||
# all samplers can be found in `generation_utils_samplers.py`
|
||||
sampling = do_sample
|
||||
if temperature is not None and temperature != 1.0:
|
||||
temperature = float(temperature)
|
||||
warpers.append(TemperatureLogitsWarper(temperature))
|
||||
sampling = True
|
||||
if top_k is not None and top_k != 0:
|
||||
warpers.append(TopKLogitsWarper(top_k=top_k))
|
||||
sampling = True
|
||||
if top_p is not None and top_p < 1.0:
|
||||
warpers.append(TopPLogitsWarper(top_p=top_p))
|
||||
sampling = True
|
||||
|
||||
self.warpers = warpers
|
||||
self.choice = Sampling() if sampling else Greedy()
|
||||
|
||||
def __call__(self, input_ids, scores):
|
||||
scores = self.warpers(input_ids, scores)
|
||||
next_ids = self.choice(scores)
|
||||
return next_ids.unsqueeze(-1)
|
||||
|
||||
|
||||
class StoppingCriteria:
|
||||
def __init__(self, max_new_tokens=20):
|
||||
self.max_new_tokens = max_new_tokens
|
||||
self.current_tokens = 0
|
||||
|
||||
def __call__(self, all_ids):
|
||||
self.current_tokens += 1
|
||||
if self.current_tokens >= self.max_new_tokens:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def initialize_torch_distributed():
|
||||
rank = int(os.getenv("RANK", "0"))
|
||||
world_size = int(os.getenv("WORLD_SIZE", "1"))
|
||||
|
||||
if torch.cuda.is_available():
|
||||
# initialized `torch.distributed`
|
||||
# Set the device id.
|
||||
assert world_size <= torch.cuda.device_count(), "Each process is one gpu"
|
||||
device = rank % torch.cuda.device_count()
|
||||
torch.cuda.set_device(device)
|
||||
backend = "nccl"
|
||||
else:
|
||||
backend = "gloo"
|
||||
|
||||
# Call the init process.
|
||||
torch.distributed.init_process_group(
|
||||
backend=backend,
|
||||
world_size=world_size,
|
||||
rank=rank,
|
||||
init_method="tcp://localhost:6000",
|
||||
)
|
||||
|
||||
return torch.distributed.distributed_c10d._get_default_group(), rank, world_size
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def set_default_dtype(dtype):
|
||||
saved_dtype = torch.get_default_dtype()
|
||||
torch.set_default_dtype(dtype)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
torch.set_default_dtype(saved_dtype)
|
|
@ -0,0 +1,480 @@
|
|||
[[package]]
|
||||
name = "accelerate"
|
||||
version = "0.12.0"
|
||||
description = "Accelerate"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
|
||||
[package.dependencies]
|
||||
numpy = ">=1.17"
|
||||
packaging = ">=20.0"
|
||||
psutil = "*"
|
||||
pyyaml = "*"
|
||||
torch = ">=1.4.0"
|
||||
|
||||
[package.extras]
|
||||
dev = ["black (>=22.0,<23.0)", "datasets", "deepspeed (<0.7.0)", "evaluate", "flake8 (>=3.8.3)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scipy", "sklearn", "tqdm", "transformers"]
|
||||
quality = ["black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)"]
|
||||
sagemaker = ["sagemaker"]
|
||||
test_dev = ["datasets", "deepspeed (<0.7.0)", "evaluate", "scipy", "sklearn", "tqdm", "transformers"]
|
||||
test_prod = ["parameterized", "pytest", "pytest-subtests", "pytest-xdist"]
|
||||
test_trackers = ["comet-ml", "tensorboard", "wandb"]
|
||||
testing = ["datasets", "deepspeed (<0.7.0)", "evaluate", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scipy", "sklearn", "tqdm", "transformers"]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.1.3"
|
||||
description = "Composable command line interface toolkit"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.5"
|
||||
description = "Cross-platform colored terminal text."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.49.1"
|
||||
description = "HTTP/2-based RPC framework"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
six = ">=1.5.2"
|
||||
|
||||
[package.extras]
|
||||
protobuf = ["grpcio-tools (>=1.49.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-reflection"
|
||||
version = "1.49.1"
|
||||
description = "Standard Protobuf Reflection Service for gRPC"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
grpcio = ">=1.49.1"
|
||||
protobuf = ">=4.21.3"
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-tools"
|
||||
version = "1.49.1"
|
||||
description = "Protobuf code generator for gRPC"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
grpcio = ">=1.49.1"
|
||||
protobuf = ">=4.21.3,<5.0dev"
|
||||
setuptools = "*"
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "1.23.3"
|
||||
description = "NumPy is the fundamental package for array computing with Python."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "21.3"
|
||||
description = "Core utilities for Python packages"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "4.21.7"
|
||||
description = ""
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
version = "5.9.2"
|
||||
description = "Cross-platform lib for process and system monitoring in Python."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[package.extras]
|
||||
test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.0.9"
|
||||
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.8"
|
||||
|
||||
[package.extras]
|
||||
diagrams = ["jinja2", "railroad-diagrams"]
|
||||
|
||||
[[package]]
|
||||
name = "PyYAML"
|
||||
version = "6.0"
|
||||
description = "YAML parser and emitter for Python"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "65.4.1"
|
||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
|
||||
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
|
||||
testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
description = "Python 2 and 3 compatibility utilities"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "torch"
|
||||
version = "1.12.1"
|
||||
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "typer"
|
||||
version = "0.6.1"
|
||||
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
click = ">=7.1.1,<9.0.0"
|
||||
|
||||
[package.extras]
|
||||
all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
|
||||
dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"]
|
||||
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)"]
|
||||
test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.3.0"
|
||||
description = "Backported and Experimental Type Hints for Python 3.7+"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "cedd0aebeb3731e2bbddf017a2ee6074c285866354272f8dfe930e9606437a25"
|
||||
|
||||
[metadata.files]
|
||||
accelerate = [
|
||||
{file = "accelerate-0.12.0-py3-none-any.whl", hash = "sha256:7742ca5c9f15dd1e0a283305599c196e260af4717a561d1f544aeab27d828af6"},
|
||||
{file = "accelerate-0.12.0.tar.gz", hash = "sha256:e8b119c94fac31877620d5f9de311164ec81fa9dc9e175f0d0d4f50fc8d79473"},
|
||||
]
|
||||
click = [
|
||||
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
|
||||
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
|
||||
]
|
||||
colorama = [
|
||||
{file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
|
||||
{file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
|
||||
]
|
||||
grpcio = [
|
||||
{file = "grpcio-1.49.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:fd86040232e805b8e6378b2348c928490ee595b058ce9aaa27ed8e4b0f172b20"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6fd0c9cede9552bf00f8c5791d257d5bf3790d7057b26c59df08be5e7a1e021d"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:d0d402e158d4e84e49c158cb5204119d55e1baf363ee98d6cb5dce321c3a065d"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:822ceec743d42a627e64ea266059a62d214c5a3cdfcd0d7fe2b7a8e4e82527c7"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2106d9c16527f0a85e2eea6e6b91a74fc99579c60dd810d8690843ea02bc0f5f"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:52dd02b7e7868233c571b49bc38ebd347c3bb1ff8907bb0cb74cb5f00c790afc"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:120fecba2ec5d14b5a15d11063b39783fda8dc8d24addd83196acb6582cabd9b"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-win32.whl", hash = "sha256:f1a3b88e3c53c1a6e6bed635ec1bbb92201bb6a1f2db186179f7f3f244829788"},
|
||||
{file = "grpcio-1.49.1-cp310-cp310-win_amd64.whl", hash = "sha256:a7d0017b92d3850abea87c1bdec6ea41104e71c77bca44c3e17f175c6700af62"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:9fb17ff8c0d56099ac6ebfa84f670c5a62228d6b5c695cf21c02160c2ac1446b"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:075f2d06e3db6b48a2157a1bcd52d6cbdca980dd18988fe6afdb41795d51625f"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:46d93a1b4572b461a227f1db6b8d35a88952db1c47e5fadcf8b8a2f0e1dd9201"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc79b2b37d779ac42341ddef40ad5bf0966a64af412c89fc2b062e3ddabb093f"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5f8b3a971c7820ea9878f3fd70086240a36aeee15d1b7e9ecbc2743b0e785568"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49b301740cf5bc8fed4fee4c877570189ae3951432d79fa8e524b09353659811"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-win32.whl", hash = "sha256:1c66a25afc6c71d357867b341da594a5587db5849b48f4b7d5908d236bb62ede"},
|
||||
{file = "grpcio-1.49.1-cp311-cp311-win_amd64.whl", hash = "sha256:6b6c3a95d27846f4145d6967899b3ab25fffc6ae99544415e1adcacef84842d2"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:1cc400c8a2173d1c042997d98a9563e12d9bb3fb6ad36b7f355bc77c7663b8af"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:34f736bd4d0deae90015c0e383885b431444fe6b6c591dea288173df20603146"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:196082b9c89ebf0961dcd77cb114bed8171964c8e3063b9da2fb33536a6938ed"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c9f89c42749890618cd3c2464e1fbf88446e3d2f67f1e334c8e5db2f3272bbd"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64419cb8a5b612cdb1550c2fd4acbb7d4fb263556cf4625f25522337e461509e"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8a5272061826e6164f96e3255405ef6f73b88fd3e8bef464c7d061af8585ac62"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ea9d0172445241ad7cb49577314e39d0af2c5267395b3561d7ced5d70458a9f3"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-win32.whl", hash = "sha256:2070e87d95991473244c72d96d13596c751cb35558e11f5df5414981e7ed2492"},
|
||||
{file = "grpcio-1.49.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fcedcab49baaa9db4a2d240ac81f2d57eb0052b1c6a9501b46b8ae912720fbf"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:afbb3475cf7f4f7d380c2ca37ee826e51974f3e2665613996a91d6a58583a534"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:a4f9ba141380abde6c3adc1727f21529137a2552002243fa87c41a07e528245c"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:cf0a1fb18a7204b9c44623dfbd1465b363236ce70c7a4ed30402f9f60d8b743b"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:17bb6fe72784b630728c6cff9c9d10ccc3b6d04e85da6e0a7b27fb1d135fac62"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18305d5a082d1593b005a895c10041f833b16788e88b02bb81061f5ebcc465df"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b6a1b39e59ac5a3067794a0e498911cf2e37e4b19ee9e9977dc5e7051714f13f"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e20d59aafc086b1cc68400463bddda6e41d3e5ed30851d1e2e0f6a2e7e342d3"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-win32.whl", hash = "sha256:e1e83233d4680863a421f3ee4a7a9b80d33cd27ee9ed7593bc93f6128302d3f2"},
|
||||
{file = "grpcio-1.49.1-cp38-cp38-win_amd64.whl", hash = "sha256:221d42c654d2a41fa31323216279c73ed17d92f533bc140a3390cc1bd78bf63c"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:fa9e6e61391e99708ac87fc3436f6b7b9c6b845dc4639b406e5e61901e1aacde"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9b449e966ef518ce9c860d21f8afe0b0f055220d95bc710301752ac1db96dd6a"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:aa34d2ad9f24e47fa9a3172801c676e4037d862247e39030165fe83821a7aafd"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5207f4eed1b775d264fcfe379d8541e1c43b878f2b63c0698f8f5c56c40f3d68"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b24a74651438d45619ac67004638856f76cc13d78b7478f2457754cbcb1c8ad"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fe763781669790dc8b9618e7e677c839c87eae6cf28b655ee1fa69ae04eea03f"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2f2ff7ba0f8f431f32d4b4bc3a3713426949d3533b08466c4ff1b2b475932ca8"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-win32.whl", hash = "sha256:08ff74aec8ff457a89b97152d36cb811dcc1d17cd5a92a65933524e363327394"},
|
||||
{file = "grpcio-1.49.1-cp39-cp39-win_amd64.whl", hash = "sha256:274ffbb39717918c514b35176510ae9be06e1d93121e84d50b350861dcb9a705"},
|
||||
{file = "grpcio-1.49.1.tar.gz", hash = "sha256:d4725fc9ec8e8822906ae26bb26f5546891aa7fbc3443de970cc556d43a5c99f"},
|
||||
]
|
||||
grpcio-reflection = [
|
||||
{file = "grpcio-reflection-1.49.1.tar.gz", hash = "sha256:b755dfe61d5255a02fb8d0d845bd0027847dee68bf0763a2b286d664ed07ec4d"},
|
||||
{file = "grpcio_reflection-1.49.1-py3-none-any.whl", hash = "sha256:70a325a83c1c1ab583d368711e5733cbef5e068ad2c17cbe77df6e47e0311d1f"},
|
||||
]
|
||||
grpcio-tools = [
|
||||
{file = "grpcio-tools-1.49.1.tar.gz", hash = "sha256:84cc64e5b46bad43d5d7bd2fd772b656eba0366961187a847e908e2cb735db91"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2dfb6c7ece84d46bd690b23d3e060d18115c8bc5047d2e8a33e6747ed323a348"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:8f452a107c054a04db2570f7851a07f060313c6e841b0d394ce6030d598290e6"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:6a198871b582287213c4d70792bf275e1d7cf34eed1d019f534ddf4cd15ab039"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0cca67a7d0287bdc855d81fdd38dc949c4273273a74f832f9e520abe4f20bc6"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaff4c89eecb37c247b93025410db68114d97fa093cbb028e9bd7cda5912473"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bb8773118ad315db317d7b22b5ff75d649ca20931733281209e7cbd8c0fad53e"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cc5534023735b8a8f56760b7c533918f874ce5a9064d7c5456d2709ae2b31f9"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-win32.whl", hash = "sha256:d277642acbe305f5586f9597b78fb9970d6633eb9f89c61e429c92c296c37129"},
|
||||
{file = "grpcio_tools-1.49.1-cp310-cp310-win_amd64.whl", hash = "sha256:eed599cf08fc1a06c72492d3c5750c32f58de3750eddd984af1f257c14326701"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:9e5c13809ab2f245398e8446c4c3b399a62d591db651e46806cccf52a700452e"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:ab3d0ee9623720ee585fdf3753b3755d3144a4a8ae35bca8e3655fa2f41056be"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ba87e3512bc91d78bf9febcfb522eadda171d2d4ddaf886066b0f01aa4929ad"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e13b3643e7577a3ec13b79689eb4d7548890b1e104c04b9ed6557a3c3dd452"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:324f67d9cb4b7058b6ce45352fb64c20cc1fa04c34d97ad44772cfe6a4ae0cf5"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a64bab81b220c50033f584f57978ebbea575f09c1ccee765cd5c462177988098"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-win32.whl", hash = "sha256:f632d376f92f23e5931697a3acf1b38df7eb719774213d93c52e02acd2d529ac"},
|
||||
{file = "grpcio_tools-1.49.1-cp311-cp311-win_amd64.whl", hash = "sha256:28ff2b978d9509474928b9c096a0cce4eaa9c8f7046136aee1545f6211ed8126"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:46afd3cb7e555187451a5d283f108cdef397952a662cb48680afc615b158864a"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:9284568b728e41fa8f7e9c2e7399545d605f75d8072ef0e9aa2a05655cb679eb"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:aa34442cf787732cb41f2aa6172007e24f480b8b9d3dc5166de80d63e9072ea4"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b8c9eb5a4250905414cd53a68caea3eb8f0c515aadb689e6e81b71ebe9ab5c6"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab15db024051bf21feb21c29cb2c3ea0a2e4f5cf341d46ef76e17fcf6aaef164"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:502084b622f758bef620a9107c2db9fcdf66d26c7e0e481d6bb87db4dc917d70"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4085890b77c640085f82bf1e90a0ea166ce48000bc2f5180914b974783c9c0a8"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-win32.whl", hash = "sha256:da0edb984699769ce02e18e3392d54b59a7a3f93acd285a68043f5bde4fc028e"},
|
||||
{file = "grpcio_tools-1.49.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9887cd622770271101a7dd1832845d64744c3f88fd11ccb2620394079197a42e"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:8440fe7dae6a40c279e3a24b82793735babd38ecbb0d07bb712ff9c8963185d9"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:b5de2bb7dd6b6231da9b1556ade981513330b740e767f1d902c71ceee0a7d196"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:1e6f06a763aea7836b63d9c117347f2bf7038008ceef72758815c9e09c5fb1fc"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e31562f90120318c5395aabec0f2f69ad8c14b6676996b7730d9d2eaf9415d57"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49ef9a4e389a618157a9daa9fafdfeeaef1ece9adda7f50f85db928f24d4b3e8"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b384cb8e8d9bcb55ee8f9b064374561c7a1a05d848249581403d36fc7060032f"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:73732f77943ac3e898879cbb29c27253aa3c47566b8a59780fd24c6a54de1b66"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-win32.whl", hash = "sha256:b594b2745a5ba9e7a76ce561bc5ab40bc65bb44743c505529b1e4f12af29104d"},
|
||||
{file = "grpcio_tools-1.49.1-cp38-cp38-win_amd64.whl", hash = "sha256:680fbc88f8709ddcabb88f86749f2d8e429160890cff2c70680880a6970d4eef"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:e8c3869121860f6767eedb7d24fc54dfd71e737fdfbb26e1334684606f3274fd"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:73e9d7c886ba10e20c97d1dab0ff961ba5800757ae5e31be21b1cda8130c52f8"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:1760de2dd2c4f08de87b039043a4797f3c17193656e7e3eb84e92f0517083c0c"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd4b1e216dd04d9245ee8f4e601a1f98c25e6e417ea5cf8d825c50589a8b447e"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1c28751ab5955cae563d07677e799233f0fe1c0fc49d9cbd61ff1957e83617f"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c24239c3ee9ed16314c14b4e24437b5079ebc344f343f33629a582f8699f583b"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:892d3dacf1942820f0b7a868a30e6fbcdf5bec08543b682c7274b0101cee632d"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-win32.whl", hash = "sha256:704d21509ec06efc9d034dbe70e7152715aac004941f4f0f553cf3a0aff15bd5"},
|
||||
{file = "grpcio_tools-1.49.1-cp39-cp39-win_amd64.whl", hash = "sha256:1efa0c221c719433f441ac0e026fc3c4dbc9a1a08a552ecdc707775e2f2fbbae"},
|
||||
]
|
||||
numpy = [
|
||||
{file = "numpy-1.23.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9f707b5bb73bf277d812ded9896f9512a43edff72712f31667d0a8c2f8e71ee"},
|
||||
{file = "numpy-1.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffcf105ecdd9396e05a8e58e81faaaf34d3f9875f137c7372450baa5d77c9a54"},
|
||||
{file = "numpy-1.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ea3f98a0ffce3f8f57675eb9119f3f4edb81888b6874bc1953f91e0b1d4f440"},
|
||||
{file = "numpy-1.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004f0efcb2fe1c0bd6ae1fcfc69cc8b6bf2407e0f18be308612007a0762b4089"},
|
||||
{file = "numpy-1.23.3-cp310-cp310-win32.whl", hash = "sha256:98dcbc02e39b1658dc4b4508442a560fe3ca5ca0d989f0df062534e5ca3a5c1a"},
|
||||
{file = "numpy-1.23.3-cp310-cp310-win_amd64.whl", hash = "sha256:39a664e3d26ea854211867d20ebcc8023257c1800ae89773cbba9f9e97bae036"},
|
||||
{file = "numpy-1.23.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1f27b5322ac4067e67c8f9378b41c746d8feac8bdd0e0ffede5324667b8a075c"},
|
||||
{file = "numpy-1.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ad3ec9a748a8943e6eb4358201f7e1c12ede35f510b1a2221b70af4bb64295c"},
|
||||
{file = "numpy-1.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdc9febce3e68b697d931941b263c59e0c74e8f18861f4064c1f712562903411"},
|
||||
{file = "numpy-1.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:301c00cf5e60e08e04d842fc47df641d4a181e651c7135c50dc2762ffe293dbd"},
|
||||
{file = "numpy-1.23.3-cp311-cp311-win32.whl", hash = "sha256:7cd1328e5bdf0dee621912f5833648e2daca72e3839ec1d6695e91089625f0b4"},
|
||||
{file = "numpy-1.23.3-cp311-cp311-win_amd64.whl", hash = "sha256:8355fc10fd33a5a70981a5b8a0de51d10af3688d7a9e4a34fcc8fa0d7467bb7f"},
|
||||
{file = "numpy-1.23.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc6e8da415f359b578b00bcfb1d08411c96e9a97f9e6c7adada554a0812a6cc6"},
|
||||
{file = "numpy-1.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:22d43376ee0acd547f3149b9ec12eec2f0ca4a6ab2f61753c5b29bb3e795ac4d"},
|
||||
{file = "numpy-1.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a64403f634e5ffdcd85e0b12c08f04b3080d3e840aef118721021f9b48fc1460"},
|
||||
{file = "numpy-1.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd9d3abe5774404becdb0748178b48a218f1d8c44e0375475732211ea47c67e"},
|
||||
{file = "numpy-1.23.3-cp38-cp38-win32.whl", hash = "sha256:f8c02ec3c4c4fcb718fdf89a6c6f709b14949408e8cf2a2be5bfa9c49548fd85"},
|
||||
{file = "numpy-1.23.3-cp38-cp38-win_amd64.whl", hash = "sha256:e868b0389c5ccfc092031a861d4e158ea164d8b7fdbb10e3b5689b4fc6498df6"},
|
||||
{file = "numpy-1.23.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09f6b7bdffe57fc61d869a22f506049825d707b288039d30f26a0d0d8ea05164"},
|
||||
{file = "numpy-1.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c79d7cf86d049d0c5089231a5bcd31edb03555bd93d81a16870aa98c6cfb79d"},
|
||||
{file = "numpy-1.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d5420053bbb3dd64c30e58f9363d7a9c27444c3648e61460c1237f9ec3fa14"},
|
||||
{file = "numpy-1.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5422d6a1ea9b15577a9432e26608c73a78faf0b9039437b075cf322c92e98e7"},
|
||||
{file = "numpy-1.23.3-cp39-cp39-win32.whl", hash = "sha256:c1ba66c48b19cc9c2975c0d354f24058888cdc674bebadceb3cdc9ec403fb5d1"},
|
||||
{file = "numpy-1.23.3-cp39-cp39-win_amd64.whl", hash = "sha256:78a63d2df1d947bd9d1b11d35564c2f9e4b57898aae4626638056ec1a231c40c"},
|
||||
{file = "numpy-1.23.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:17c0e467ade9bda685d5ac7f5fa729d8d3e76b23195471adae2d6a6941bd2c18"},
|
||||
{file = "numpy-1.23.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91b8d6768a75247026e951dce3b2aac79dc7e78622fc148329135ba189813584"},
|
||||
{file = "numpy-1.23.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:94c15ca4e52671a59219146ff584488907b1f9b3fc232622b47e2cf832e94fb8"},
|
||||
{file = "numpy-1.23.3.tar.gz", hash = "sha256:51bf49c0cd1d52be0a240aa66f3458afc4b95d8993d2d04f0d91fa60c10af6cd"},
|
||||
]
|
||||
packaging = [
|
||||
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
||||
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
||||
]
|
||||
protobuf = [
|
||||
{file = "protobuf-4.21.7-cp310-abi3-win32.whl", hash = "sha256:c7cb105d69a87416bd9023e64324e1c089593e6dae64d2536f06bcbe49cd97d8"},
|
||||
{file = "protobuf-4.21.7-cp310-abi3-win_amd64.whl", hash = "sha256:3ec85328a35a16463c6f419dbce3c0fc42b3e904d966f17f48bae39597c7a543"},
|
||||
{file = "protobuf-4.21.7-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:db9056b6a11cb5131036d734bcbf91ef3ef9235d6b681b2fc431cbfe5a7f2e56"},
|
||||
{file = "protobuf-4.21.7-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:ca200645d6235ce0df3ccfdff1567acbab35c4db222a97357806e015f85b5744"},
|
||||
{file = "protobuf-4.21.7-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:b019c79e23a80735cc8a71b95f76a49a262f579d6b84fd20a0b82279f40e2cc1"},
|
||||
{file = "protobuf-4.21.7-cp37-cp37m-win32.whl", hash = "sha256:d3f89ccf7182293feba2de2739c8bf34fed1ed7c65a5cf987be00311acac57c1"},
|
||||
{file = "protobuf-4.21.7-cp37-cp37m-win_amd64.whl", hash = "sha256:a74d96cd960b87b4b712797c741bb3ea3a913f5c2dc4b6cbe9c0f8360b75297d"},
|
||||
{file = "protobuf-4.21.7-cp38-cp38-win32.whl", hash = "sha256:8e09d1916386eca1ef1353767b6efcebc0a6859ed7f73cb7fb974feba3184830"},
|
||||
{file = "protobuf-4.21.7-cp38-cp38-win_amd64.whl", hash = "sha256:9e355f2a839d9930d83971b9f562395e13493f0e9211520f8913bd11efa53c02"},
|
||||
{file = "protobuf-4.21.7-cp39-cp39-win32.whl", hash = "sha256:f370c0a71712f8965023dd5b13277444d3cdfecc96b2c778b0e19acbfd60df6e"},
|
||||
{file = "protobuf-4.21.7-cp39-cp39-win_amd64.whl", hash = "sha256:9643684232b6b340b5e63bb69c9b4904cdd39e4303d498d1a92abddc7e895b7f"},
|
||||
{file = "protobuf-4.21.7-py2.py3-none-any.whl", hash = "sha256:8066322588d4b499869bf9f665ebe448e793036b552f68c585a9b28f1e393f66"},
|
||||
{file = "protobuf-4.21.7-py3-none-any.whl", hash = "sha256:58b81358ec6c0b5d50df761460ae2db58405c063fd415e1101209221a0a810e1"},
|
||||
{file = "protobuf-4.21.7.tar.gz", hash = "sha256:71d9dba03ed3432c878a801e2ea51e034b0ea01cf3a4344fb60166cb5f6c8757"},
|
||||
]
|
||||
psutil = [
|
||||
{file = "psutil-5.9.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:8f024fbb26c8daf5d70287bb3edfafa22283c255287cf523c5d81721e8e5d82c"},
|
||||
{file = "psutil-5.9.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:b2f248ffc346f4f4f0d747ee1947963613216b06688be0be2e393986fe20dbbb"},
|
||||
{file = "psutil-5.9.2-cp27-cp27m-win32.whl", hash = "sha256:b1928b9bf478d31fdffdb57101d18f9b70ed4e9b0e41af751851813547b2a9ab"},
|
||||
{file = "psutil-5.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:404f4816c16a2fcc4eaa36d7eb49a66df2d083e829d3e39ee8759a411dbc9ecf"},
|
||||
{file = "psutil-5.9.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:94e621c6a4ddb2573d4d30cba074f6d1aa0186645917df42c811c473dd22b339"},
|
||||
{file = "psutil-5.9.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:256098b4f6ffea6441eb54ab3eb64db9ecef18f6a80d7ba91549195d55420f84"},
|
||||
{file = "psutil-5.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:614337922702e9be37a39954d67fdb9e855981624d8011a9927b8f2d3c9625d9"},
|
||||
{file = "psutil-5.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39ec06dc6c934fb53df10c1672e299145ce609ff0611b569e75a88f313634969"},
|
||||
{file = "psutil-5.9.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3ac2c0375ef498e74b9b4ec56df3c88be43fe56cac465627572dbfb21c4be34"},
|
||||
{file = "psutil-5.9.2-cp310-cp310-win32.whl", hash = "sha256:e4c4a7636ffc47b7141864f1c5e7d649f42c54e49da2dd3cceb1c5f5d29bfc85"},
|
||||
{file = "psutil-5.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:f4cb67215c10d4657e320037109939b1c1d2fd70ca3d76301992f89fe2edb1f1"},
|
||||
{file = "psutil-5.9.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dc9bda7d5ced744622f157cc8d8bdd51735dafcecff807e928ff26bdb0ff097d"},
|
||||
{file = "psutil-5.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75291912b945a7351d45df682f9644540d564d62115d4a20d45fa17dc2d48f8"},
|
||||
{file = "psutil-5.9.2-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4018d5f9b6651f9896c7a7c2c9f4652e4eea53f10751c4e7d08a9093ab587ec"},
|
||||
{file = "psutil-5.9.2-cp36-cp36m-win32.whl", hash = "sha256:f40ba362fefc11d6bea4403f070078d60053ed422255bd838cd86a40674364c9"},
|
||||
{file = "psutil-5.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9770c1d25aee91417eba7869139d629d6328a9422ce1cdd112bd56377ca98444"},
|
||||
{file = "psutil-5.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:42638876b7f5ef43cef8dcf640d3401b27a51ee3fa137cb2aa2e72e188414c32"},
|
||||
{file = "psutil-5.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91aa0dac0c64688667b4285fa29354acfb3e834e1fd98b535b9986c883c2ce1d"},
|
||||
{file = "psutil-5.9.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fb54941aac044a61db9d8eb56fc5bee207db3bc58645d657249030e15ba3727"},
|
||||
{file = "psutil-5.9.2-cp37-cp37m-win32.whl", hash = "sha256:7cbb795dcd8ed8fd238bc9e9f64ab188f3f4096d2e811b5a82da53d164b84c3f"},
|
||||
{file = "psutil-5.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5d39e3a2d5c40efa977c9a8dd4f679763c43c6c255b1340a56489955dbca767c"},
|
||||
{file = "psutil-5.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fd331866628d18223a4265371fd255774affd86244fc307ef66eaf00de0633d5"},
|
||||
{file = "psutil-5.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b315febaebae813326296872fdb4be92ad3ce10d1d742a6b0c49fb619481ed0b"},
|
||||
{file = "psutil-5.9.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7929a516125f62399d6e8e026129c8835f6c5a3aab88c3fff1a05ee8feb840d"},
|
||||
{file = "psutil-5.9.2-cp38-cp38-win32.whl", hash = "sha256:561dec454853846d1dd0247b44c2e66a0a0c490f937086930ec4b8f83bf44f06"},
|
||||
{file = "psutil-5.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:67b33f27fc0427483b61563a16c90d9f3b547eeb7af0ef1b9fe024cdc9b3a6ea"},
|
||||
{file = "psutil-5.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3591616fa07b15050b2f87e1cdefd06a554382e72866fcc0ab2be9d116486c8"},
|
||||
{file = "psutil-5.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b29f581b5edab1f133563272a6011925401804d52d603c5c606936b49c8b97"},
|
||||
{file = "psutil-5.9.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4642fd93785a29353d6917a23e2ac6177308ef5e8be5cc17008d885cb9f70f12"},
|
||||
{file = "psutil-5.9.2-cp39-cp39-win32.whl", hash = "sha256:ed29ea0b9a372c5188cdb2ad39f937900a10fb5478dc077283bf86eeac678ef1"},
|
||||
{file = "psutil-5.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:68b35cbff92d1f7103d8f1db77c977e72f49fcefae3d3d2b91c76b0e7aef48b8"},
|
||||
{file = "psutil-5.9.2.tar.gz", hash = "sha256:feb861a10b6c3bb00701063b37e4afc754f8217f0f09c42280586bd6ac712b5c"},
|
||||
]
|
||||
pyparsing = [
|
||||
{file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
|
||||
{file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
|
||||
]
|
||||
PyYAML = [
|
||||
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
|
||||
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
|
||||
]
|
||||
setuptools = [
|
||||
{file = "setuptools-65.4.1-py3-none-any.whl", hash = "sha256:1b6bdc6161661409c5f21508763dc63ab20a9ac2f8ba20029aaaa7fdb9118012"},
|
||||
{file = "setuptools-65.4.1.tar.gz", hash = "sha256:3050e338e5871e70c72983072fe34f6032ae1cdeeeb67338199c2f74e083a80e"},
|
||||
]
|
||||
six = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||
]
|
||||
torch = [
|
||||
{file = "torch-1.12.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9c038662db894a23e49e385df13d47b2a777ffd56d9bcd5b832593fab0a7e286"},
|
||||
{file = "torch-1.12.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:4e1b9c14cf13fd2ab8d769529050629a0e68a6fc5cb8e84b4a3cc1dd8c4fe541"},
|
||||
{file = "torch-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:e9c8f4a311ac29fc7e8e955cfb7733deb5dbe1bdaabf5d4af2765695824b7e0d"},
|
||||
{file = "torch-1.12.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:976c3f997cea38ee91a0dd3c3a42322785414748d1761ef926b789dfa97c6134"},
|
||||
{file = "torch-1.12.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:68104e4715a55c4bb29a85c6a8d57d820e0757da363be1ba680fa8cc5be17b52"},
|
||||
{file = "torch-1.12.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:743784ccea0dc8f2a3fe6a536bec8c4763bd82c1352f314937cb4008d4805de1"},
|
||||
{file = "torch-1.12.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b5dbcca369800ce99ba7ae6dee3466607a66958afca3b740690d88168752abcf"},
|
||||
{file = "torch-1.12.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f3b52a634e62821e747e872084ab32fbcb01b7fa7dbb7471b6218279f02a178a"},
|
||||
{file = "torch-1.12.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:8a34a2fbbaa07c921e1b203f59d3d6e00ed379f2b384445773bd14e328a5b6c8"},
|
||||
{file = "torch-1.12.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:42f639501928caabb9d1d55ddd17f07cd694de146686c24489ab8c615c2871f2"},
|
||||
{file = "torch-1.12.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0b44601ec56f7dd44ad8afc00846051162ef9c26a8579dda0a02194327f2d55e"},
|
||||
{file = "torch-1.12.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:cd26d8c5640c3a28c526d41ccdca14cf1cbca0d0f2e14e8263a7ac17194ab1d2"},
|
||||
{file = "torch-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:42e115dab26f60c29e298559dbec88444175528b729ae994ec4c65d56fe267dd"},
|
||||
{file = "torch-1.12.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:a8320ba9ad87e80ca5a6a016e46ada4d1ba0c54626e135d99b2129a4541c509d"},
|
||||
{file = "torch-1.12.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:03e31c37711db2cd201e02de5826de875529e45a55631d317aadce2f1ed45aa8"},
|
||||
{file = "torch-1.12.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9b356aea223772cd754edb4d9ecf2a025909b8615a7668ac7d5130f86e7ec421"},
|
||||
{file = "torch-1.12.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:6cf6f54b43c0c30335428195589bd00e764a6d27f3b9ba637aaa8c11aaf93073"},
|
||||
{file = "torch-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:f00c721f489089dc6364a01fd84906348fe02243d0af737f944fddb36003400d"},
|
||||
{file = "torch-1.12.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:bfec2843daa654f04fda23ba823af03e7b6f7650a873cdb726752d0e3718dada"},
|
||||
{file = "torch-1.12.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:69fe2cae7c39ccadd65a123793d30e0db881f1c1927945519c5c17323131437e"},
|
||||
]
|
||||
typer = [
|
||||
{file = "typer-0.6.1-py3-none-any.whl", hash = "sha256:54b19e5df18654070a82f8c2aa1da456a4ac16a2a83e6dcd9f170e291c56338e"},
|
||||
{file = "typer-0.6.1.tar.gz", hash = "sha256:2d5720a5e63f73eaf31edaa15f6ab87f35f0690f8ca233017d7d23d743a91d73"},
|
||||
]
|
||||
typing-extensions = [
|
||||
{file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"},
|
||||
{file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"},
|
||||
]
|
|
@ -0,0 +1,21 @@
|
|||
[tool.poetry]
|
||||
name = "bloom-inference"
|
||||
version = "0.1.0"
|
||||
description = "BLOOM Inference Python gRPC Server"
|
||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
protobuf = "^4.21.7"
|
||||
grpcio = "^1.49.1"
|
||||
torch = "^1.12.1"
|
||||
typer = "^0.6.1"
|
||||
grpcio-reflection = "^1.49.1"
|
||||
accelerate = "^0.12.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
grpcio-tools = "^1.49.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
Loading…
Reference in New Issue