Init
This commit is contained in:
commit
295831a481
|
@ -0,0 +1,37 @@
|
||||||
|
# BLOOM Inference
|
||||||
|
|
||||||
|
A Rust and gRPC server for BLOOM Inference.
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cd server
|
||||||
|
pip install .
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
cd router
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python server/bloom_inference/main.py bigscience/bloom --num-gpus 8 --shard-directory /dev/shm/models
|
||||||
|
```
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./router/target/release/router
|
||||||
|
```
|
||||||
|
|
||||||
|
## TODO:
|
||||||
|
|
||||||
|
- [ ] Improve model download
|
||||||
|
- Store "shardable" layers separately and layer by layer
|
||||||
|
- [ ] Add batching args to router CLI
|
||||||
|
- [ ] Add docstrings + comments everywhere as the codebase is fairly complicated
|
||||||
|
- [ ] Add tests
|
||||||
|
- [ ] Add shutdown logic in router and server
|
||||||
|
- [ ] Improve multi-processing logic in server
|
||||||
|
- [ ] Improve error handling everywhere
|
||||||
|
- [ ] Improve past key layer indexing?
|
|
@ -0,0 +1,83 @@
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package generate.v1;
|
||||||
|
|
||||||
|
service TextGeneration {
|
||||||
|
/// Service discovery
|
||||||
|
rpc ServiceDiscovery(Empty) returns (ServiceDiscoveryResponse) {}
|
||||||
|
/// Empties batch cache
|
||||||
|
rpc ClearCache(Empty) returns (Empty);
|
||||||
|
/// Generate tokens for a batch without cache
|
||||||
|
rpc Generate(Batch) returns (Response);
|
||||||
|
/// Generate tokens for a batch with cache
|
||||||
|
rpc GenerateWithCache(BatchCached) returns (Response);
|
||||||
|
}
|
||||||
|
|
||||||
|
message ServiceDiscoveryResponse {
|
||||||
|
repeated string urls = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LogitsWarperParameters {
|
||||||
|
float temperature = 1;
|
||||||
|
uint32 top_k = 2;
|
||||||
|
float top_p = 3;
|
||||||
|
bool do_sample = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Request {
|
||||||
|
/// Request ID
|
||||||
|
uint64 id = 1;
|
||||||
|
/// The generation context
|
||||||
|
string inputs = 2;
|
||||||
|
/// Logits Warper Parameters
|
||||||
|
LogitsWarperParameters parameters = 3;
|
||||||
|
/// Stopping criteria
|
||||||
|
uint32 max_new_tokens = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Batch {
|
||||||
|
/// Batch ID
|
||||||
|
uint64 id = 1;
|
||||||
|
/// Individual requests
|
||||||
|
repeated Request requests = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message BatchCached {
|
||||||
|
/// Batch ID
|
||||||
|
uint64 id = 1;
|
||||||
|
/// Request ids within cache
|
||||||
|
repeated uint64 request_ids = 2;
|
||||||
|
/// Cache IDs
|
||||||
|
repeated uint64 batch_cached_ids = 3;
|
||||||
|
/// Batch size (sum of all batch sizes)
|
||||||
|
uint32 total_batch_size = 4;
|
||||||
|
/// Max sequence length
|
||||||
|
uint32 max_sequence_length = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message FinishedGeneration {
|
||||||
|
/// ID of the original request
|
||||||
|
uint64 id = 1;
|
||||||
|
/// Output
|
||||||
|
string output = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message CacheEntry {
|
||||||
|
/// Cache ID; same as batch ID
|
||||||
|
uint64 id = 1;
|
||||||
|
/// Requests present in cache entry
|
||||||
|
repeated uint64 request_ids = 2;
|
||||||
|
/// Sequence length
|
||||||
|
uint32 sequence_length = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Response {
|
||||||
|
/// Finished requests (optional)
|
||||||
|
repeated FinishedGeneration finished = 1;
|
||||||
|
/// Cache entry (optional)
|
||||||
|
optional CacheEntry cache_entry = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Represent an empty message.
|
||||||
|
message Empty {}
|
|
@ -0,0 +1 @@
|
||||||
|
/target
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,28 @@
|
||||||
|
[package]
|
||||||
|
name = "bloom-inference"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
bloom-inference-client = { path = "client" }
|
||||||
|
futures = "0.3.24"
|
||||||
|
parking_lot = "0.12.1"
|
||||||
|
poem = "1.3.45"
|
||||||
|
serde = "1.0.145"
|
||||||
|
serde_json = "1.0.85"
|
||||||
|
tokenizers = "0.13.0"
|
||||||
|
tokio = { version = "1.21.1", features = ["rt-multi-thread", "parking_lot", "sync"] }
|
||||||
|
tracing = "0.1.36"
|
||||||
|
tracing-subscriber = "0.3.15"
|
||||||
|
|
||||||
|
[workspace]
|
||||||
|
members = [
|
||||||
|
"client",
|
||||||
|
]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = 1
|
||||||
|
incremental = true
|
||||||
|
lto = "off"
|
|
@ -0,0 +1,19 @@
|
||||||
|
[package]
|
||||||
|
name = "bloom-inference-client"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
futures = "0.3.24"
|
||||||
|
#grpc-error-details = { path = "../../grpc-error-details" }
|
||||||
|
#grpc-metadata = { path = "../../grpc-metadata" }
|
||||||
|
prost = "^0.9"
|
||||||
|
thiserror = "1.0.37"
|
||||||
|
tokio = { version = "1.21.2", features = ["sync"] }
|
||||||
|
tonic = "^0.6"
|
||||||
|
tower = "^0.4"
|
||||||
|
tracing = "^0.1"
|
||||||
|
tracing-error = "^0.2"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
tonic-build = "0.6.2"
|
|
@ -0,0 +1,14 @@
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
fs::create_dir("src/pb").unwrap_or(());
|
||||||
|
tonic_build::configure()
|
||||||
|
.build_client(true)
|
||||||
|
.build_server(false)
|
||||||
|
.out_dir("src/pb")
|
||||||
|
.include_file("mod.rs")
|
||||||
|
.compile(&["../../proto/generate.proto"], &["../../proto"])
|
||||||
|
.unwrap_or_else(|e| panic!("protobuf compilation failed: {}", e));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -0,0 +1,104 @@
|
||||||
|
use crate::pb::generate::v1::text_generation_client::TextGenerationClient;
|
||||||
|
use crate::pb::generate::v1::*;
|
||||||
|
use crate::Result;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tonic::transport::{Channel, Uri};
|
||||||
|
use tower::timeout::Timeout;
|
||||||
|
use tracing::*;
|
||||||
|
|
||||||
|
/// BLOOM Inference gRPC client
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Client {
|
||||||
|
stub: TextGenerationClient<Timeout<Channel>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Client {
|
||||||
|
/// Returns a client connected to the given url. Requests exceeding timeout will fail.
|
||||||
|
pub async fn connect(uri: Uri, timeout: Duration) -> Self {
|
||||||
|
let channel = Channel::builder(uri)
|
||||||
|
.connect()
|
||||||
|
.await
|
||||||
|
.expect("Transport error");
|
||||||
|
let timeout_channel = Timeout::new(channel, timeout);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
stub: TextGenerationClient::new(timeout_channel),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a client connected to the given unix socket. Requests exceeding timeout will fail.
|
||||||
|
pub async fn connect_uds(path: String, timeout: Duration) -> Self {
|
||||||
|
let channel = Channel::from_shared(format!("http://[::]:50051"))
|
||||||
|
.unwrap()
|
||||||
|
.connect_with_connector(tower::service_fn(move |_: Uri| {
|
||||||
|
tokio::net::UnixStream::connect(path.clone())
|
||||||
|
}))
|
||||||
|
.await
|
||||||
|
.expect("Transport error");
|
||||||
|
let timeout_channel = Timeout::new(channel, timeout);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
stub: TextGenerationClient::new(timeout_channel),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip(self))]
|
||||||
|
pub async fn service_discovery(&mut self) -> Result<Vec<String>> {
|
||||||
|
let request = tonic::Request::new(Empty {});
|
||||||
|
let response = self
|
||||||
|
.stub
|
||||||
|
.service_discovery(request)
|
||||||
|
.instrument(info_span!("service_discovery"))
|
||||||
|
.await?;
|
||||||
|
let urls = response
|
||||||
|
.into_inner()
|
||||||
|
.urls
|
||||||
|
.into_iter()
|
||||||
|
.map(|url| match url.strip_prefix("unix://") {
|
||||||
|
None => url,
|
||||||
|
Some(stripped_url) => stripped_url.to_string(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Ok(urls)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip(self))]
|
||||||
|
pub async fn clear_cache(&mut self) -> Result<()> {
|
||||||
|
let request = tonic::Request::new(Empty {});
|
||||||
|
self.stub
|
||||||
|
.clear_cache(request)
|
||||||
|
.instrument(info_span!("clear_cache"))
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip(self))]
|
||||||
|
pub async fn generate(
|
||||||
|
&mut self,
|
||||||
|
request: Batch,
|
||||||
|
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||||
|
let request = tonic::Request::new(request);
|
||||||
|
let response = self
|
||||||
|
.stub
|
||||||
|
.generate(request)
|
||||||
|
.instrument(info_span!("generate"))
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
|
Ok((response.finished, response.cache_entry))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip(self))]
|
||||||
|
pub async fn generate_with_cache(
|
||||||
|
&mut self,
|
||||||
|
request: BatchCached,
|
||||||
|
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||||
|
let request = tonic::Request::new(request);
|
||||||
|
let response = self
|
||||||
|
.stub
|
||||||
|
.generate_with_cache(request)
|
||||||
|
.instrument(info_span!("generate_with_cache"))
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
|
Ok((response.finished, response.cache_entry))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
//! BLOOM Inference gRPC client library
|
||||||
|
|
||||||
|
mod client;
|
||||||
|
mod pb;
|
||||||
|
mod sharded_client;
|
||||||
|
|
||||||
|
pub use client::Client;
|
||||||
|
pub use pb::generate::v1::{
|
||||||
|
Batch, BatchCached, CacheEntry, FinishedGeneration, LogitsWarperParameters, Request,
|
||||||
|
};
|
||||||
|
pub use sharded_client::ShardedClient;
|
||||||
|
use thiserror::Error;
|
||||||
|
pub use tonic::transport::Uri;
|
||||||
|
use tonic::Status;
|
||||||
|
|
||||||
|
#[derive(Error, Debug, Clone)]
|
||||||
|
#[error("Text generation client error: {msg:?}")]
|
||||||
|
pub struct ClientError {
|
||||||
|
msg: String,
|
||||||
|
// source: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Status> for ClientError {
|
||||||
|
fn from(err: Status) -> Self {
|
||||||
|
Self {
|
||||||
|
msg: err.to_string(),
|
||||||
|
// source: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Result<T> = std::result::Result<T, ClientError>;
|
|
@ -0,0 +1 @@
|
||||||
|
*.rs
|
|
@ -0,0 +1,106 @@
|
||||||
|
use crate::Result;
|
||||||
|
use crate::{Batch, BatchCached, CacheEntry, Client, FinishedGeneration};
|
||||||
|
use futures::future::join_all;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::sync::{broadcast, mpsc};
|
||||||
|
use tonic::transport::Uri;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum Command {
|
||||||
|
Generate(
|
||||||
|
Batch,
|
||||||
|
mpsc::Sender<Result<(Vec<FinishedGeneration>, Option<CacheEntry>)>>,
|
||||||
|
),
|
||||||
|
GenerateWithCache(
|
||||||
|
BatchCached,
|
||||||
|
mpsc::Sender<Result<(Vec<FinishedGeneration>, Option<CacheEntry>)>>,
|
||||||
|
),
|
||||||
|
ClearCache(mpsc::Sender<Result<()>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn client_task(mut client: Client, mut request_subscriber: broadcast::Receiver<Command>) {
|
||||||
|
while let Ok(message) = request_subscriber.recv().await {
|
||||||
|
match message {
|
||||||
|
Command::Generate(batch, response_tx) => {
|
||||||
|
let result = client.generate(batch).await;
|
||||||
|
response_tx.try_send(result).unwrap_or(());
|
||||||
|
}
|
||||||
|
Command::GenerateWithCache(batch_cached, response_tx) => {
|
||||||
|
let result = client.generate_with_cache(batch_cached).await;
|
||||||
|
response_tx.try_send(result).unwrap_or(());
|
||||||
|
}
|
||||||
|
Command::ClearCache(response_tx) => {
|
||||||
|
let result = client.clear_cache().await;
|
||||||
|
response_tx.try_send(result).unwrap_or(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ShardedClient {
|
||||||
|
request_tx: broadcast::Sender<Command>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ShardedClient {
|
||||||
|
fn new(mut clients: Vec<Client>) -> Self {
|
||||||
|
let (request_tx, _) = broadcast::channel(1);
|
||||||
|
|
||||||
|
for client in clients.drain(..) {
|
||||||
|
let request_subscriber = request_tx.subscribe();
|
||||||
|
tokio::spawn(client_task(client, request_subscriber));
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { request_tx }
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn from_master_client(mut master_client: Client) -> Self {
|
||||||
|
let uris = master_client.service_discovery().await.unwrap();
|
||||||
|
let futures = uris
|
||||||
|
.into_iter()
|
||||||
|
.map(|path| Client::connect_uds(path, Duration::from_secs(5)));
|
||||||
|
let clients = join_all(futures).await;
|
||||||
|
Self::new(clients)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a client connected to the given url. Requests exceeding timeout will fail.
|
||||||
|
pub async fn connect(uri: Uri, timeout: Duration) -> Self {
|
||||||
|
let master_client = Client::connect(uri, timeout).await;
|
||||||
|
Self::from_master_client(master_client).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a client connected to the given unix socket. Requests exceeding timeout will fail.
|
||||||
|
pub async fn connect_uds(path: String, timeout: Duration) -> Self {
|
||||||
|
let master_client = Client::connect_uds(path, timeout).await;
|
||||||
|
Self::from_master_client(master_client).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn generate(
|
||||||
|
&self,
|
||||||
|
batch: Batch,
|
||||||
|
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||||
|
let (response_tx, mut response_rx) = mpsc::channel(1);
|
||||||
|
self.request_tx
|
||||||
|
.send(Command::Generate(batch, response_tx))
|
||||||
|
.unwrap();
|
||||||
|
response_rx.recv().await.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn generate_with_cache(
|
||||||
|
&self,
|
||||||
|
batch_cached: BatchCached,
|
||||||
|
) -> Result<(Vec<FinishedGeneration>, Option<CacheEntry>)> {
|
||||||
|
let (response_tx, mut response_rx) = mpsc::channel(1);
|
||||||
|
self.request_tx
|
||||||
|
.send(Command::GenerateWithCache(batch_cached, response_tx))
|
||||||
|
.unwrap();
|
||||||
|
response_rx.recv().await.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn clear_cache(&self) -> Result<()> {
|
||||||
|
let (response_tx, mut response_rx) = mpsc::channel(1);
|
||||||
|
self.request_tx
|
||||||
|
.send(Command::ClearCache(response_tx))
|
||||||
|
.unwrap();
|
||||||
|
response_rx.recv().await.unwrap()
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,129 @@
|
||||||
|
/// This code is massively inspired by Tokio mini-redis
|
||||||
|
use crate::GenerateRequest;
|
||||||
|
use bloom_inference_client::{Batch, ClientError, LogitsWarperParameters, Request};
|
||||||
|
use parking_lot::RwLock;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::oneshot::Sender;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub(crate) struct Db {
|
||||||
|
pub shared: Arc<Shared>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Shared {
|
||||||
|
state: RwLock<State>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct State {
|
||||||
|
entries: BTreeMap<u64, (Request, Sender<Result<String, ClientError>>)>,
|
||||||
|
|
||||||
|
/// Identifier to use for the next expiration. Each expiration is associated
|
||||||
|
/// with a unique identifier. See above for why.
|
||||||
|
next_id: u64,
|
||||||
|
|
||||||
|
next_batch_id: u64,
|
||||||
|
|
||||||
|
/// Current batch id
|
||||||
|
next_batch_start_id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Db {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
let shared = Arc::new(Shared {
|
||||||
|
state: RwLock::new(State {
|
||||||
|
entries: BTreeMap::new(),
|
||||||
|
next_id: 0,
|
||||||
|
next_batch_id: 0,
|
||||||
|
next_batch_start_id: 0,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
Self { shared }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn append(&self, request: GenerateRequest, sender: Sender<Result<String, ClientError>>) {
|
||||||
|
let mut state = self.shared.state.write();
|
||||||
|
|
||||||
|
let id = state.next_id;
|
||||||
|
state.next_id += 1;
|
||||||
|
|
||||||
|
let parameters = Some(LogitsWarperParameters {
|
||||||
|
temperature: request.parameters.temperature,
|
||||||
|
top_k: request.parameters.top_k,
|
||||||
|
top_p: request.parameters.top_p,
|
||||||
|
do_sample: request.parameters.do_sample,
|
||||||
|
});
|
||||||
|
let request = Request {
|
||||||
|
id,
|
||||||
|
inputs: request.inputs,
|
||||||
|
parameters,
|
||||||
|
max_new_tokens: request.parameters.max_new_tokens,
|
||||||
|
};
|
||||||
|
state.entries.insert(id, (request, sender));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn remove(&self, id: &u64) -> Option<(Request, Sender<Result<String, ClientError>>)> {
|
||||||
|
let mut state = self.shared.state.write();
|
||||||
|
state.entries.remove(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn len(&self) -> usize {
|
||||||
|
let state = self.shared.state.read();
|
||||||
|
state.entries.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_requests(&self, max_size: usize) -> Option<(u64, Vec<Request>)> {
|
||||||
|
let state = self.shared.state.read();
|
||||||
|
|
||||||
|
let requests: Vec<Request> = state
|
||||||
|
.entries
|
||||||
|
.range(state.next_batch_start_id..)
|
||||||
|
.take(max_size)
|
||||||
|
.map(|(_, (request, _))| request.clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if requests.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let last_id = requests.last().unwrap().id;
|
||||||
|
Some((last_id, requests))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn next_batch(&self, max_size: usize) -> Option<Batch> {
|
||||||
|
if let Some((last_id, requests)) = self.next_requests(max_size) {
|
||||||
|
let mut state = self.shared.state.write();
|
||||||
|
let batch = Batch {
|
||||||
|
id: state.next_batch_id,
|
||||||
|
requests,
|
||||||
|
};
|
||||||
|
state.next_batch_start_id = last_id + 1;
|
||||||
|
state.next_batch_id += 1;
|
||||||
|
return Some(batch);
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn next_batch_minimum_size(
|
||||||
|
&self,
|
||||||
|
min_size: usize,
|
||||||
|
max_size: usize,
|
||||||
|
) -> Option<Batch> {
|
||||||
|
if let Some((last_id, requests)) = self.next_requests(max_size) {
|
||||||
|
if requests.len() >= min_size {
|
||||||
|
let mut state = self.shared.state.write();
|
||||||
|
let batch = Batch {
|
||||||
|
id: state.next_batch_id,
|
||||||
|
requests,
|
||||||
|
};
|
||||||
|
state.next_batch_start_id = last_id + 1;
|
||||||
|
state.next_batch_id += 1;
|
||||||
|
return Some(batch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,130 @@
|
||||||
|
use crate::{Db, GenerateRequest};
|
||||||
|
use bloom_inference_client::{Batch, BatchCached, CacheEntry, ClientError, FinishedGeneration, ShardedClient};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::{oneshot, Notify};
|
||||||
|
|
||||||
|
const MAX_LENGTH: usize = 128;
|
||||||
|
|
||||||
|
pub struct InferError {}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub(crate) struct Infer {
|
||||||
|
db: Db,
|
||||||
|
shared: Arc<Shared>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Shared {
|
||||||
|
batching_task: Notify,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Infer {
|
||||||
|
pub(crate) fn new(client: ShardedClient) -> Self {
|
||||||
|
let db = Db::new();
|
||||||
|
let shared = Arc::new(Shared {
|
||||||
|
batching_task: Notify::new(),
|
||||||
|
});
|
||||||
|
|
||||||
|
tokio::spawn(batching_task(client, db.clone(), shared.clone()));
|
||||||
|
|
||||||
|
Self { db, shared }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn infer(&self, request: GenerateRequest) -> Result<String, InferError> {
|
||||||
|
if self.db.len() > MAX_LENGTH {
|
||||||
|
return Err(InferError {});
|
||||||
|
}
|
||||||
|
let (request_tx, request_rx) = oneshot::channel();
|
||||||
|
self.db.append(request, request_tx);
|
||||||
|
self.shared.batching_task.notify_waiters();
|
||||||
|
match request_rx.await.unwrap() {
|
||||||
|
Ok(output) => Ok(output),
|
||||||
|
Err(_) => Err(InferError {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn batching_task(client: ShardedClient, db: Db, shared: Arc<Shared>) {
|
||||||
|
loop {
|
||||||
|
shared.batching_task.notified().await;
|
||||||
|
|
||||||
|
if let Some(batch) = db.next_batch(32) {
|
||||||
|
let mut cache_entry = infer_batch(batch, &client, &db).await;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if let Some(entry) = cache_entry {
|
||||||
|
let mut batch_cached_ids = vec![entry.id];
|
||||||
|
let mut total_batch_size = entry.request_ids.len();
|
||||||
|
let mut max_sequence_length = entry.sequence_length;
|
||||||
|
let mut request_ids = entry.request_ids;
|
||||||
|
|
||||||
|
if total_batch_size <= 16 {
|
||||||
|
if let Some(batch) = db.next_batch_minimum_size(16, 48) {
|
||||||
|
let other_cache_entry = infer_batch(batch, &client, &db).await;
|
||||||
|
|
||||||
|
if let Some(entry) = other_cache_entry {
|
||||||
|
batch_cached_ids.push(entry.id);
|
||||||
|
total_batch_size += entry.request_ids.len();
|
||||||
|
max_sequence_length =
|
||||||
|
max_sequence_length.max(entry.sequence_length);
|
||||||
|
request_ids.extend(entry.request_ids.into_iter());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let batch_cached = BatchCached {
|
||||||
|
id: entry.id,
|
||||||
|
batch_cached_ids,
|
||||||
|
total_batch_size: total_batch_size as u32,
|
||||||
|
max_sequence_length,
|
||||||
|
request_ids,
|
||||||
|
};
|
||||||
|
cache_entry = infer_batch_cached(batch_cached, &client, &db).await;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn infer_batch_cached(batch: BatchCached, client: &ShardedClient, db: &Db) -> Option<CacheEntry> {
|
||||||
|
match client.generate_with_cache(batch.clone()).await {
|
||||||
|
Ok((finished, cache_entry)) => {
|
||||||
|
send_finished(finished, db);
|
||||||
|
cache_entry
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
println!("{:?}", err);
|
||||||
|
send_error(err, batch.request_ids, &db);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn infer_batch(batch: Batch, client: &ShardedClient, db: &Db) -> Option<CacheEntry> {
|
||||||
|
match client.generate(batch.clone()).await {
|
||||||
|
Ok((finished, cache_entry)) => {
|
||||||
|
send_finished(finished, db);
|
||||||
|
cache_entry
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
println!("{:?}", err);
|
||||||
|
send_error(err, batch.requests.into_iter().map(|req| req.id).collect(), &db);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn send_error(error: ClientError, request_ids: Vec<u64>, db: &Db) {
|
||||||
|
request_ids.into_iter().for_each(|id| {
|
||||||
|
let (_, response_tx) = db.remove(&id).unwrap();
|
||||||
|
response_tx.send(Err(error.clone())).unwrap_or(());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn send_finished(finished: Vec<FinishedGeneration>, db: &Db) {
|
||||||
|
finished.into_iter().for_each(|output| {
|
||||||
|
let (_, response_tx) = db.remove(&output.id).unwrap();
|
||||||
|
response_tx.send(Ok(output.output)).unwrap_or(());
|
||||||
|
});
|
||||||
|
}
|
|
@ -0,0 +1,125 @@
|
||||||
|
use tokio::time::Instant;
|
||||||
|
|
||||||
|
use poem;
|
||||||
|
use poem::middleware::AddData;
|
||||||
|
use poem::web::Data;
|
||||||
|
use poem::{handler, listener::TcpListener, post, web::Json, EndpointExt, Result, Route, Server};
|
||||||
|
|
||||||
|
use bloom_inference_client::ShardedClient;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::time::Duration;
|
||||||
|
use poem::http::StatusCode;
|
||||||
|
use tracing::instrument;
|
||||||
|
|
||||||
|
mod db;
|
||||||
|
|
||||||
|
use db::Db;
|
||||||
|
|
||||||
|
mod infer;
|
||||||
|
|
||||||
|
use infer::Infer;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
|
struct GenerateParameters {
|
||||||
|
#[serde(default = "default_temperature")]
|
||||||
|
temperature: f32,
|
||||||
|
#[serde(default = "default_top_k")]
|
||||||
|
top_k: u32,
|
||||||
|
#[serde(default = "default_top_p")]
|
||||||
|
top_p: f32,
|
||||||
|
#[serde(default = "default_do_sample")]
|
||||||
|
do_sample: bool,
|
||||||
|
#[serde(default = "default_max_new_tokens")]
|
||||||
|
max_new_tokens: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_temperature() -> f32 {
|
||||||
|
1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_top_k() -> u32 {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_top_p() -> f32 {
|
||||||
|
1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_do_sample() -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_max_new_tokens() -> u32 {
|
||||||
|
20
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
|
struct GenerateRequest {
|
||||||
|
inputs: String,
|
||||||
|
#[serde(default = "default_parameters")]
|
||||||
|
parameters: GenerateParameters,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_parameters() -> GenerateParameters {
|
||||||
|
GenerateParameters {
|
||||||
|
temperature: default_temperature(),
|
||||||
|
top_k: default_top_k(),
|
||||||
|
top_p: default_top_p(),
|
||||||
|
do_sample: default_do_sample(),
|
||||||
|
max_new_tokens: default_max_new_tokens(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[handler]
|
||||||
|
#[instrument(skip(infer), fields(time, time_per_token))]
|
||||||
|
async fn generate(
|
||||||
|
infer: Data<&Infer>,
|
||||||
|
req: Json<GenerateRequest>,
|
||||||
|
) -> Result<Json<serde_json::Value>> {
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
let output = infer
|
||||||
|
.infer(GenerateRequest {
|
||||||
|
inputs: req.inputs.clone(),
|
||||||
|
parameters: req.parameters.clone(),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match output {
|
||||||
|
Ok(generated_text) => {
|
||||||
|
tracing::Span::current().record("time", format!("{:?}", start.elapsed()));
|
||||||
|
tracing::Span::current().record("time_per_token", format!("{:?}", start.elapsed() / req.parameters.max_new_tokens));
|
||||||
|
tracing::info!("response: {}", generated_text);
|
||||||
|
|
||||||
|
Ok(Json(serde_json::json!({
|
||||||
|
"generated_text": generated_text,
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
Err(poem::Error::from_status(StatusCode::INTERNAL_SERVER_ERROR))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), std::io::Error> {
|
||||||
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
|
let sharded_client =
|
||||||
|
ShardedClient::connect_uds("/tmp/bloom-inference-0".to_string(), Duration::from_secs(5))
|
||||||
|
.await;
|
||||||
|
sharded_client
|
||||||
|
.clear_cache()
|
||||||
|
.await
|
||||||
|
.expect("Unable to clear cache");
|
||||||
|
tracing::info!("Connected");
|
||||||
|
|
||||||
|
let infer = Infer::new(sharded_client);
|
||||||
|
|
||||||
|
let app = Route::new()
|
||||||
|
.at("/generate", post(generate))
|
||||||
|
.with(AddData::new(infer));
|
||||||
|
Server::new(TcpListener::bind("127.0.0.1:3000"))
|
||||||
|
.run(app)
|
||||||
|
.await
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,20 @@
|
||||||
|
gen-server:
|
||||||
|
mkdir bloom_inference/pb || true
|
||||||
|
python -m grpc_tools.protoc -I../proto --python_out=bloom_inference/pb --grpc_python_out=bloom_inference/pb ../proto/generate.proto
|
||||||
|
find bloom_inference/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
||||||
|
touch bloom_inference/pb/__init__.py
|
||||||
|
|
||||||
|
unit-tests:
|
||||||
|
python -m pytest --cov=bloom_inference tests
|
||||||
|
|
||||||
|
unit-tests-reporting:
|
||||||
|
python -m pytest --junitxml=report.xml --cov=bloom_inference tests
|
||||||
|
|
||||||
|
pip-install:
|
||||||
|
pip install grpcio-tools
|
||||||
|
make gen-server
|
||||||
|
pip install .
|
||||||
|
|
||||||
|
install:
|
||||||
|
poetry install
|
||||||
|
make gen-server
|
|
@ -0,0 +1,15 @@
|
||||||
|
# BLOOM Inference Python gRPC Server
|
||||||
|
|
||||||
|
A Python gRPC server for BLOOM Inference
|
||||||
|
|
||||||
|
## Local Install (with poetry)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
|
## Local Install (with pip)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
make pip-install
|
||||||
|
```
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,48 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, Optional, List
|
||||||
|
|
||||||
|
from bloom_inference.pb import generate_pb2
|
||||||
|
from bloom_inference.utils import NextTokenChooser, StoppingCriteria
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CacheEntry:
|
||||||
|
batch_id: int
|
||||||
|
request_ids: List[int]
|
||||||
|
input_ids: Dict[str, torch.Tensor]
|
||||||
|
all_input_ids: List[torch.Tensor]
|
||||||
|
next_token_choosers: List[NextTokenChooser]
|
||||||
|
stopping_criterias: List[StoppingCriteria]
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.request_ids)
|
||||||
|
|
||||||
|
def to_pb(self):
|
||||||
|
return generate_pb2.CacheEntry(
|
||||||
|
id=self.batch_id,
|
||||||
|
request_ids=self.request_ids,
|
||||||
|
sequence_length=max(len(entry) for entry in self.all_input_ids),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Cache:
|
||||||
|
def __init__(self):
|
||||||
|
self.cache: Dict[str, CacheEntry] = {}
|
||||||
|
|
||||||
|
def pop(self, batch_id: str) -> Optional[CacheEntry]:
|
||||||
|
return self.cache.pop(batch_id, None)
|
||||||
|
|
||||||
|
def set(self, entry: CacheEntry):
|
||||||
|
if entry is not None:
|
||||||
|
self.cache[entry.batch_id] = entry
|
||||||
|
|
||||||
|
def delete(self, batch_id: str):
|
||||||
|
del self.cache[batch_id]
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.cache.clear()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.cache.keys())
|
|
@ -0,0 +1,30 @@
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from torch.distributed.launcher import launch_agent, LaunchConfig
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from bloom_inference.server import serve
|
||||||
|
|
||||||
|
|
||||||
|
def main(
|
||||||
|
model_name: str,
|
||||||
|
num_gpus: int = 1,
|
||||||
|
shard_directory: Optional[Path] = None,
|
||||||
|
):
|
||||||
|
if num_gpus == 1:
|
||||||
|
serve(model_name, False, shard_directory)
|
||||||
|
|
||||||
|
else:
|
||||||
|
config = LaunchConfig(
|
||||||
|
min_nodes=1,
|
||||||
|
max_nodes=1,
|
||||||
|
nproc_per_node=num_gpus,
|
||||||
|
rdzv_backend="c10d",
|
||||||
|
max_restarts=0,
|
||||||
|
)
|
||||||
|
launch_agent(config, serve, [model_name, True, shard_directory])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
typer.run(main)
|
|
@ -0,0 +1,428 @@
|
||||||
|
import torch
|
||||||
|
import torch.distributed
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple, Optional, Dict
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
||||||
|
from transformers.modeling_utils import no_init_weights
|
||||||
|
|
||||||
|
from bloom_inference.cache import CacheEntry
|
||||||
|
from bloom_inference.pb import generate_pb2
|
||||||
|
from bloom_inference.shard_model import shard_model, match_suffix
|
||||||
|
from bloom_inference.utils import (
|
||||||
|
StoppingCriteria,
|
||||||
|
NextTokenChooser,
|
||||||
|
initialize_torch_distributed,
|
||||||
|
set_default_dtype,
|
||||||
|
)
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Batch:
|
||||||
|
batch_id: int
|
||||||
|
request_ids: List[int]
|
||||||
|
input_ids: Dict[str, torch.Tensor]
|
||||||
|
all_input_ids: List[torch.Tensor]
|
||||||
|
next_token_choosers: List[NextTokenChooser]
|
||||||
|
stopping_criterias: List[StoppingCriteria]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_batch_pb(
|
||||||
|
cls, pb: generate_pb2.Batch, tokenizer: AutoTokenizer, device: torch.device
|
||||||
|
) -> "Batch":
|
||||||
|
request_ids = []
|
||||||
|
inputs = []
|
||||||
|
next_token_choosers = []
|
||||||
|
stopping_criterias = []
|
||||||
|
|
||||||
|
# Parse batch
|
||||||
|
for r in pb.requests:
|
||||||
|
request_ids.append(r.id)
|
||||||
|
inputs.append(r.inputs)
|
||||||
|
next_token_choosers.append(
|
||||||
|
NextTokenChooser(
|
||||||
|
temperature=r.parameters.temperature,
|
||||||
|
top_k=r.parameters.top_k,
|
||||||
|
top_p=r.parameters.top_p,
|
||||||
|
do_sample=r.parameters.do_sample,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
stopping_criterias.append(StoppingCriteria(max_new_tokens=r.max_new_tokens))
|
||||||
|
|
||||||
|
input_ids = tokenizer(inputs, return_tensors="pt", padding=True).to(device)
|
||||||
|
all_input_ids = input_ids["input_ids"].unsqueeze(-1)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
pb.id,
|
||||||
|
request_ids,
|
||||||
|
input_ids,
|
||||||
|
all_input_ids,
|
||||||
|
next_token_choosers,
|
||||||
|
stopping_criterias,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_cache_entry(cls, cache_entry: CacheEntry) -> "Batch":
|
||||||
|
return cls(
|
||||||
|
cache_entry.batch_id,
|
||||||
|
cache_entry.request_ids,
|
||||||
|
cache_entry.input_ids,
|
||||||
|
cache_entry.all_input_ids,
|
||||||
|
cache_entry.next_token_choosers,
|
||||||
|
cache_entry.stopping_criterias,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_batch_cached_pb(cls, pb: generate_pb2.BatchCached, cache) -> "Batch":
|
||||||
|
if len(pb.batch_cached_ids) == 1:
|
||||||
|
cache_entry = cache.pop(pb.batch_cached_ids[0])
|
||||||
|
if cache_entry is None:
|
||||||
|
raise ValueError(f"Batch ID {pb.batch_id} not found in cache")
|
||||||
|
return cls.from_cache_entry(cache_entry)
|
||||||
|
|
||||||
|
total_batch_size = pb.total_batch_size
|
||||||
|
max_sequence_length = pb.max_sequence_length
|
||||||
|
input_ids = {"input_ids": None, "attention_mask": None, "past_key_values": []}
|
||||||
|
request_ids = []
|
||||||
|
all_input_ids = []
|
||||||
|
next_token_choosers = []
|
||||||
|
stopping_criterias = []
|
||||||
|
start_index = 0
|
||||||
|
for i, batch_id in enumerate(pb.batch_cached_ids):
|
||||||
|
cache_entry = cache.pop(batch_id)
|
||||||
|
if cache_entry is None:
|
||||||
|
raise ValueError(f"Batch ID {batch_id} not found in cache")
|
||||||
|
request_ids.extend(cache_entry.request_ids)
|
||||||
|
all_input_ids.extend(cache_entry.all_input_ids)
|
||||||
|
next_token_choosers.extend(cache_entry.next_token_choosers)
|
||||||
|
stopping_criterias.extend(cache_entry.stopping_criterias)
|
||||||
|
|
||||||
|
batch_size = len(cache_entry.request_ids)
|
||||||
|
end_index = start_index + batch_size
|
||||||
|
sequence_length = max(len(entry) for entry in cache_entry.all_input_ids)
|
||||||
|
|
||||||
|
if input_ids["input_ids"] is None:
|
||||||
|
input_ids["input_ids"] = torch.empty(
|
||||||
|
(total_batch_size, 1),
|
||||||
|
dtype=cache_entry.input_ids["input_ids"].dtype,
|
||||||
|
device=cache_entry.input_ids["input_ids"].device,
|
||||||
|
)
|
||||||
|
|
||||||
|
input_ids["input_ids"][start_index:end_index] = cache_entry.input_ids[
|
||||||
|
"input_ids"
|
||||||
|
]
|
||||||
|
|
||||||
|
if input_ids["attention_mask"] is None:
|
||||||
|
input_ids["attention_mask"] = torch.zeros(
|
||||||
|
(total_batch_size, max_sequence_length),
|
||||||
|
dtype=cache_entry.input_ids["attention_mask"].dtype,
|
||||||
|
device=cache_entry.input_ids["attention_mask"].device,
|
||||||
|
)
|
||||||
|
|
||||||
|
input_ids["attention_mask"][
|
||||||
|
start_index:end_index, -sequence_length:
|
||||||
|
] = cache_entry.input_ids["attention_mask"][:, -sequence_length:]
|
||||||
|
|
||||||
|
for j, past in enumerate(cache_entry.input_ids["past_key_values"]):
|
||||||
|
# TODO: this could be done without the views by using indices
|
||||||
|
past_keys = past[0]
|
||||||
|
past_values = past[1]
|
||||||
|
|
||||||
|
_, head_dim, padded_sequence_length = past_keys.shape
|
||||||
|
|
||||||
|
past_keys = past_keys.view(
|
||||||
|
batch_size, -1, head_dim, padded_sequence_length
|
||||||
|
)
|
||||||
|
past_values = past_values.view(
|
||||||
|
batch_size, -1, padded_sequence_length, head_dim
|
||||||
|
)
|
||||||
|
num_heads = past_keys.shape[1]
|
||||||
|
|
||||||
|
if j == len(input_ids["past_key_values"]):
|
||||||
|
padded_past_keys = torch.zeros(
|
||||||
|
(
|
||||||
|
total_batch_size,
|
||||||
|
num_heads,
|
||||||
|
head_dim,
|
||||||
|
max_sequence_length - 1,
|
||||||
|
),
|
||||||
|
dtype=past_keys.dtype,
|
||||||
|
device=past_keys.device,
|
||||||
|
)
|
||||||
|
padded_past_values = torch.zeros(
|
||||||
|
(
|
||||||
|
total_batch_size,
|
||||||
|
num_heads,
|
||||||
|
max_sequence_length - 1,
|
||||||
|
head_dim,
|
||||||
|
),
|
||||||
|
dtype=past_values.dtype,
|
||||||
|
device=past_values.device,
|
||||||
|
)
|
||||||
|
input_ids["past_key_values"].append(
|
||||||
|
[padded_past_keys, padded_past_values]
|
||||||
|
)
|
||||||
|
|
||||||
|
input_ids["past_key_values"][j][0][
|
||||||
|
start_index:end_index, :, :, -(sequence_length - 1):
|
||||||
|
] = past_keys[:, :, :, -(sequence_length - 1):]
|
||||||
|
|
||||||
|
input_ids["past_key_values"][j][1][
|
||||||
|
start_index:end_index, :, -(sequence_length - 1):, :
|
||||||
|
] = past_values[:, :, -(sequence_length - 1):, :]
|
||||||
|
|
||||||
|
if (i + 1) == len(pb.batch_cached_ids):
|
||||||
|
input_ids["past_key_values"][j][0] = input_ids["past_key_values"][
|
||||||
|
j
|
||||||
|
][0].view(total_batch_size * num_heads, head_dim, -1)
|
||||||
|
input_ids["past_key_values"][j][1] = input_ids["past_key_values"][
|
||||||
|
j
|
||||||
|
][1].view(total_batch_size * num_heads, -1, head_dim)
|
||||||
|
|
||||||
|
start_index += batch_size
|
||||||
|
|
||||||
|
assert pb.request_ids == request_ids
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
pb.id,
|
||||||
|
request_ids,
|
||||||
|
input_ids,
|
||||||
|
all_input_ids,
|
||||||
|
next_token_choosers,
|
||||||
|
stopping_criterias,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FinishedGeneration:
|
||||||
|
request_id: str
|
||||||
|
output: str
|
||||||
|
|
||||||
|
def to_pb(self) -> generate_pb2.FinishedGeneration:
|
||||||
|
return generate_pb2.FinishedGeneration(id=self.request_id, output=self.output)
|
||||||
|
|
||||||
|
|
||||||
|
class BLOOM:
|
||||||
|
def __init__(self, model_name: str):
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
self.device = torch.device("cuda")
|
||||||
|
else:
|
||||||
|
self.device = torch.device("cpu")
|
||||||
|
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
|
||||||
|
self.model = (
|
||||||
|
AutoModelForCausalLM.from_pretrained(model_name).eval().to(self.device)
|
||||||
|
)
|
||||||
|
self.num_heads = self.model.base_model.num_heads
|
||||||
|
|
||||||
|
def forward(self, input_ids, attention_mask, past_key_values: Optional = None):
|
||||||
|
# Model Forward
|
||||||
|
return self.model.forward(
|
||||||
|
input_ids=input_ids,
|
||||||
|
attention_mask=attention_mask,
|
||||||
|
past_key_values=past_key_values,
|
||||||
|
use_cache=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate_token(
|
||||||
|
self, batch: Batch
|
||||||
|
) -> Tuple[List[FinishedGeneration], Optional[CacheEntry]]:
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = self.forward(**batch.input_ids)
|
||||||
|
|
||||||
|
# List of indices to cache
|
||||||
|
cache_indices = []
|
||||||
|
cache_past_indices = []
|
||||||
|
|
||||||
|
# New input_ids for next forward; keep in cache
|
||||||
|
cache_next_input_ids = []
|
||||||
|
cache_all_input_ids = []
|
||||||
|
|
||||||
|
# Finished requests
|
||||||
|
finished_generations: List[FinishedGeneration] = []
|
||||||
|
|
||||||
|
# Zipped iterator
|
||||||
|
iterator = zip(
|
||||||
|
batch.request_ids,
|
||||||
|
outputs.logits,
|
||||||
|
batch.next_token_choosers,
|
||||||
|
batch.stopping_criterias,
|
||||||
|
batch.all_input_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
# For each member of the batch
|
||||||
|
for i, (
|
||||||
|
request_id,
|
||||||
|
logits,
|
||||||
|
next_token_chooser,
|
||||||
|
stopping_criteria,
|
||||||
|
all_tokens,
|
||||||
|
) in enumerate(iterator):
|
||||||
|
# Select next token
|
||||||
|
next_token = next_token_chooser(all_tokens, logits.unsqueeze(0)[:, -1])
|
||||||
|
|
||||||
|
# Append next token to all tokens
|
||||||
|
all_tokens = torch.cat([all_tokens, next_token])
|
||||||
|
|
||||||
|
# Evaluate stopping criteria
|
||||||
|
if stopping_criteria(all_tokens):
|
||||||
|
# Decode all tokens
|
||||||
|
output = self.tokenizer.decode(
|
||||||
|
all_tokens.squeeze(-1), skip_special_tokens=True
|
||||||
|
)
|
||||||
|
# Add to the list of finished generations with the original request id
|
||||||
|
finished_generations.append(FinishedGeneration(request_id, output))
|
||||||
|
# must be added to the cache
|
||||||
|
else:
|
||||||
|
cache_indices.append(i)
|
||||||
|
cache_past_indices.extend([j for j in range(i * self.num_heads, (i + 1) * self.num_heads)])
|
||||||
|
cache_next_input_ids.append(next_token)
|
||||||
|
cache_all_input_ids.append(all_tokens)
|
||||||
|
|
||||||
|
# No cache is needed, we finished all generations in the batch
|
||||||
|
if not cache_indices:
|
||||||
|
return finished_generations, None
|
||||||
|
|
||||||
|
# If we finished at least one generation
|
||||||
|
cache_input_ids = {"input_ids": torch.cat(cache_next_input_ids, dim=0)}
|
||||||
|
if finished_generations:
|
||||||
|
# Apply indices to attention mask, past key values and other items that need to be cached
|
||||||
|
cache_input_ids["attention_mask"] = batch.input_ids["attention_mask"][
|
||||||
|
cache_indices
|
||||||
|
]
|
||||||
|
cache_input_ids["past_key_values"] = [
|
||||||
|
(keys[cache_past_indices], values[cache_past_indices])
|
||||||
|
for keys, values in outputs["past_key_values"]
|
||||||
|
]
|
||||||
|
cache_request_ids = [batch.request_ids[i] for i in cache_indices]
|
||||||
|
cache_next_token_choosers = [
|
||||||
|
batch.next_token_choosers[i] for i in cache_indices
|
||||||
|
]
|
||||||
|
cache_stopping_criterias = [
|
||||||
|
batch.stopping_criterias[i] for i in cache_indices
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
cache_input_ids["attention_mask"] = batch.input_ids["attention_mask"]
|
||||||
|
cache_input_ids["past_key_values"] = outputs["past_key_values"]
|
||||||
|
cache_request_ids = batch.request_ids
|
||||||
|
cache_next_token_choosers = batch.next_token_choosers
|
||||||
|
cache_stopping_criterias = batch.stopping_criterias
|
||||||
|
|
||||||
|
# Update attention_mask with padding as we added a new token to input_ids
|
||||||
|
cache_input_ids["attention_mask"] = torch.cat(
|
||||||
|
[
|
||||||
|
cache_input_ids["attention_mask"],
|
||||||
|
torch.ones((cache_input_ids["attention_mask"].shape[0], 1)).to(
|
||||||
|
cache_input_ids["attention_mask"].device
|
||||||
|
),
|
||||||
|
],
|
||||||
|
dim=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cache_entry = CacheEntry(
|
||||||
|
batch.batch_id,
|
||||||
|
cache_request_ids,
|
||||||
|
cache_input_ids,
|
||||||
|
cache_all_input_ids,
|
||||||
|
cache_next_token_choosers,
|
||||||
|
cache_stopping_criterias,
|
||||||
|
)
|
||||||
|
return finished_generations, cache_entry
|
||||||
|
|
||||||
|
|
||||||
|
class BLOOMSharded(BLOOM):
|
||||||
|
def __init__(self, model_name: str, shard_directory: Path):
|
||||||
|
super(BLOOM, self).__init__()
|
||||||
|
self.process_group, self.rank, self.world_size = initialize_torch_distributed()
|
||||||
|
self.master = self.rank == 0
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
self.device = torch.device(f"cuda:{self.rank}")
|
||||||
|
dtype = torch.bfloat16
|
||||||
|
else:
|
||||||
|
self.device = torch.device("cpu")
|
||||||
|
dtype = torch.float32
|
||||||
|
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
|
||||||
|
|
||||||
|
# shard state_dict
|
||||||
|
if self.master:
|
||||||
|
# TODO @thomasw21 do some caching
|
||||||
|
shard_state_dict_paths = shard_model(
|
||||||
|
model_name, shard_directory, tp_world_size=self.world_size, dtype=dtype
|
||||||
|
)
|
||||||
|
shard_state_dict_paths = [
|
||||||
|
str(path.absolute()) for path in shard_state_dict_paths
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
shard_state_dict_paths = [None] * self.world_size
|
||||||
|
|
||||||
|
torch.distributed.broadcast_object_list(
|
||||||
|
shard_state_dict_paths, src=0, group=self.process_group
|
||||||
|
)
|
||||||
|
shard_state_dict_path = shard_state_dict_paths[self.rank]
|
||||||
|
|
||||||
|
config = AutoConfig.from_pretrained(
|
||||||
|
model_name, slow_but_exact=False, tp_parallel=True
|
||||||
|
)
|
||||||
|
config.pad_token_id = 3
|
||||||
|
|
||||||
|
# The flag below controls whether to allow TF32 on matmul. This flag defaults to False
|
||||||
|
# in PyTorch 1.12 and later.
|
||||||
|
torch.backends.cuda.matmul.allow_tf32 = True
|
||||||
|
|
||||||
|
# The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
|
||||||
|
torch.backends.cudnn.allow_tf32 = True
|
||||||
|
|
||||||
|
with set_default_dtype(dtype):
|
||||||
|
with no_init_weights():
|
||||||
|
# we can probably set the device to `meta` here?
|
||||||
|
model = AutoModelForCausalLM.from_config(config).to(dtype)
|
||||||
|
|
||||||
|
torch.distributed.barrier(group=self.process_group)
|
||||||
|
# print_rank_0(f"Initialized model")
|
||||||
|
state_dict = torch.load(shard_state_dict_path)
|
||||||
|
# TODO @thomasw21: HACK in order to transpose all weight prior
|
||||||
|
for key in state_dict.keys():
|
||||||
|
do_transpose = False
|
||||||
|
if not match_suffix(key, "weight"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for potential_suffix in [
|
||||||
|
"self_attention.query_key_value.weight",
|
||||||
|
"self_attention.dense.weight",
|
||||||
|
"dense_h_to_4h.weight",
|
||||||
|
"dense_4h_to_h.weight",
|
||||||
|
]:
|
||||||
|
if match_suffix(key, potential_suffix):
|
||||||
|
do_transpose = True
|
||||||
|
|
||||||
|
if do_transpose:
|
||||||
|
state_dict[key] = state_dict[key].transpose(1, 0).contiguous()
|
||||||
|
|
||||||
|
model.load_state_dict(state_dict)
|
||||||
|
self.model = model.to(self.device).eval()
|
||||||
|
self.num_heads = config.n_head // self.process_group.size()
|
||||||
|
torch.distributed.barrier(group=self.process_group)
|
||||||
|
|
||||||
|
def forward(self, input_ids, attention_mask, past_key_values: Optional = None):
|
||||||
|
outputs = self.model.forward(
|
||||||
|
input_ids=input_ids,
|
||||||
|
attention_mask=attention_mask,
|
||||||
|
past_key_values=past_key_values,
|
||||||
|
use_cache=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
logits_shard = outputs.logits[:, -1, :].contiguous()
|
||||||
|
|
||||||
|
batch_size, vocab_shard_size = logits_shard.shape
|
||||||
|
vocab_size = self.world_size * vocab_shard_size
|
||||||
|
logits = [torch.empty_like(logits_shard) for _ in range(self.world_size)]
|
||||||
|
torch.distributed.all_gather(logits, logits_shard, group=self.process_group)
|
||||||
|
logits = torch.cat(logits, dim=1).view(batch_size, 1, vocab_size)
|
||||||
|
|
||||||
|
outputs.logits = logits
|
||||||
|
return outputs
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,43 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||||
|
# source: generate.proto
|
||||||
|
"""Generated protocol buffer code."""
|
||||||
|
from google.protobuf.internal import builder as _builder
|
||||||
|
from google.protobuf import descriptor as _descriptor
|
||||||
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||||
|
from google.protobuf import symbol_database as _symbol_database
|
||||||
|
# @@protoc_insertion_point(imports)
|
||||||
|
|
||||||
|
_sym_db = _symbol_database.Default()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0egenerate.proto\x12\x0bgenerate.v1\"(\n\x18ServiceDiscoveryResponse\x12\x0c\n\x04urls\x18\x01 \x03(\t\"^\n\x16LogitsWarperParameters\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_k\x18\x02 \x01(\r\x12\r\n\x05top_p\x18\x03 \x01(\x02\x12\x11\n\tdo_sample\x18\x04 \x01(\x08\"v\n\x07Request\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06inputs\x18\x02 \x01(\t\x12\x37\n\nparameters\x18\x03 \x01(\x0b\x32#.generate.v1.LogitsWarperParameters\x12\x16\n\x0emax_new_tokens\x18\x04 \x01(\r\";\n\x05\x42\x61tch\x12\n\n\x02id\x18\x01 \x01(\x04\x12&\n\x08requests\x18\x02 \x03(\x0b\x32\x14.generate.v1.Request\"\x7f\n\x0b\x42\x61tchCached\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x18\n\x10\x62\x61tch_cached_ids\x18\x03 \x03(\x04\x12\x18\n\x10total_batch_size\x18\x04 \x01(\r\x12\x1b\n\x13max_sequence_length\x18\x05 \x01(\r\"0\n\x12\x46inishedGeneration\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06output\x18\x02 \x01(\t\"F\n\nCacheEntry\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x17\n\x0fsequence_length\x18\x03 \x01(\r\"\x80\x01\n\x08Response\x12\x31\n\x08\x66inished\x18\x01 \x03(\x0b\x32\x1f.generate.v1.FinishedGeneration\x12\x31\n\x0b\x63\x61\x63he_entry\x18\x02 \x01(\x0b\x32\x17.generate.v1.CacheEntryH\x00\x88\x01\x01\x42\x0e\n\x0c_cache_entry\"\x07\n\x05\x45mpty2\x94\x02\n\x0eTextGeneration\x12O\n\x10ServiceDiscovery\x12\x12.generate.v1.Empty\x1a%.generate.v1.ServiceDiscoveryResponse\"\x00\x12\x34\n\nClearCache\x12\x12.generate.v1.Empty\x1a\x12.generate.v1.Empty\x12\x35\n\x08Generate\x12\x12.generate.v1.Batch\x1a\x15.generate.v1.Response\x12\x44\n\x11GenerateWithCache\x12\x18.generate.v1.BatchCached\x1a\x15.generate.v1.Responseb\x06proto3')
|
||||||
|
|
||||||
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
||||||
|
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'generate_pb2', globals())
|
||||||
|
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||||
|
|
||||||
|
DESCRIPTOR._options = None
|
||||||
|
_SERVICEDISCOVERYRESPONSE._serialized_start=31
|
||||||
|
_SERVICEDISCOVERYRESPONSE._serialized_end=71
|
||||||
|
_LOGITSWARPERPARAMETERS._serialized_start=73
|
||||||
|
_LOGITSWARPERPARAMETERS._serialized_end=167
|
||||||
|
_REQUEST._serialized_start=169
|
||||||
|
_REQUEST._serialized_end=287
|
||||||
|
_BATCH._serialized_start=289
|
||||||
|
_BATCH._serialized_end=348
|
||||||
|
_BATCHCACHED._serialized_start=350
|
||||||
|
_BATCHCACHED._serialized_end=477
|
||||||
|
_FINISHEDGENERATION._serialized_start=479
|
||||||
|
_FINISHEDGENERATION._serialized_end=527
|
||||||
|
_CACHEENTRY._serialized_start=529
|
||||||
|
_CACHEENTRY._serialized_end=599
|
||||||
|
_RESPONSE._serialized_start=602
|
||||||
|
_RESPONSE._serialized_end=730
|
||||||
|
_EMPTY._serialized_start=732
|
||||||
|
_EMPTY._serialized_end=739
|
||||||
|
_TEXTGENERATION._serialized_start=742
|
||||||
|
_TEXTGENERATION._serialized_end=1018
|
||||||
|
# @@protoc_insertion_point(module_scope)
|
|
@ -0,0 +1,43 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||||
|
# source: generate.proto
|
||||||
|
"""Generated protocol buffer code."""
|
||||||
|
from google.protobuf.internal import builder as _builder
|
||||||
|
from google.protobuf import descriptor as _descriptor
|
||||||
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||||
|
from google.protobuf import symbol_database as _symbol_database
|
||||||
|
# @@protoc_insertion_point(imports)
|
||||||
|
|
||||||
|
_sym_db = _symbol_database.Default()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0egenerate.proto\x12\x0bgenerate.v1\"(\n\x18ServiceDiscoveryResponse\x12\x0c\n\x04urls\x18\x01 \x03(\t\"^\n\x16LogitsWarperParameters\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_k\x18\x02 \x01(\r\x12\r\n\x05top_p\x18\x03 \x01(\x02\x12\x11\n\tdo_sample\x18\x04 \x01(\x08\"v\n\x07Request\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06inputs\x18\x02 \x01(\t\x12\x37\n\nparameters\x18\x03 \x01(\x0b\x32#.generate.v1.LogitsWarperParameters\x12\x16\n\x0emax_new_tokens\x18\x04 \x01(\r\";\n\x05\x42\x61tch\x12\n\n\x02id\x18\x01 \x01(\x04\x12&\n\x08requests\x18\x02 \x03(\x0b\x32\x14.generate.v1.Request\"\x7f\n\x0b\x42\x61tchCached\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x18\n\x10\x62\x61tch_cached_ids\x18\x03 \x03(\x04\x12\x18\n\x10total_batch_size\x18\x04 \x01(\r\x12\x1b\n\x13max_sequence_length\x18\x05 \x01(\r\"0\n\x12\x46inishedGeneration\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06output\x18\x02 \x01(\t\"F\n\nCacheEntry\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x13\n\x0brequest_ids\x18\x02 \x03(\x04\x12\x17\n\x0fsequence_length\x18\x03 \x01(\r\"\x80\x01\n\x08Response\x12\x31\n\x08\x66inished\x18\x01 \x03(\x0b\x32\x1f.generate.v1.FinishedGeneration\x12\x31\n\x0b\x63\x61\x63he_entry\x18\x02 \x01(\x0b\x32\x17.generate.v1.CacheEntryH\x00\x88\x01\x01\x42\x0e\n\x0c_cache_entry\"\x07\n\x05\x45mpty2\x94\x02\n\x0eTextGeneration\x12O\n\x10ServiceDiscovery\x12\x12.generate.v1.Empty\x1a%.generate.v1.ServiceDiscoveryResponse\"\x00\x12\x34\n\nClearCache\x12\x12.generate.v1.Empty\x1a\x12.generate.v1.Empty\x12\x35\n\x08Generate\x12\x12.generate.v1.Batch\x1a\x15.generate.v1.Response\x12\x44\n\x11GenerateWithCache\x12\x18.generate.v1.BatchCached\x1a\x15.generate.v1.Responseb\x06proto3')
|
||||||
|
|
||||||
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
||||||
|
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'generate_pb2', globals())
|
||||||
|
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||||
|
|
||||||
|
DESCRIPTOR._options = None
|
||||||
|
_SERVICEDISCOVERYRESPONSE._serialized_start=31
|
||||||
|
_SERVICEDISCOVERYRESPONSE._serialized_end=71
|
||||||
|
_LOGITSWARPERPARAMETERS._serialized_start=73
|
||||||
|
_LOGITSWARPERPARAMETERS._serialized_end=167
|
||||||
|
_REQUEST._serialized_start=169
|
||||||
|
_REQUEST._serialized_end=287
|
||||||
|
_BATCH._serialized_start=289
|
||||||
|
_BATCH._serialized_end=348
|
||||||
|
_BATCHCACHED._serialized_start=350
|
||||||
|
_BATCHCACHED._serialized_end=477
|
||||||
|
_FINISHEDGENERATION._serialized_start=479
|
||||||
|
_FINISHEDGENERATION._serialized_end=527
|
||||||
|
_CACHEENTRY._serialized_start=529
|
||||||
|
_CACHEENTRY._serialized_end=599
|
||||||
|
_RESPONSE._serialized_start=602
|
||||||
|
_RESPONSE._serialized_end=730
|
||||||
|
_EMPTY._serialized_start=732
|
||||||
|
_EMPTY._serialized_end=739
|
||||||
|
_TEXTGENERATION._serialized_start=742
|
||||||
|
_TEXTGENERATION._serialized_end=1018
|
||||||
|
# @@protoc_insertion_point(module_scope)
|
|
@ -0,0 +1,169 @@
|
||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
from . import generate_pb2 as generate__pb2
|
||||||
|
|
||||||
|
|
||||||
|
class TextGenerationStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.ServiceDiscovery = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/ServiceDiscovery',
|
||||||
|
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||||
|
)
|
||||||
|
self.ClearCache = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/ClearCache',
|
||||||
|
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.Empty.FromString,
|
||||||
|
)
|
||||||
|
self.Generate = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/Generate',
|
||||||
|
request_serializer=generate__pb2.Batch.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.Response.FromString,
|
||||||
|
)
|
||||||
|
self.GenerateWithCache = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/GenerateWithCache',
|
||||||
|
request_serializer=generate__pb2.BatchCached.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.Response.FromString,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TextGenerationServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def ServiceDiscovery(self, request, context):
|
||||||
|
"""/ Service discovery
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def ClearCache(self, request, context):
|
||||||
|
"""/ Empties batch cache
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Generate(self, request, context):
|
||||||
|
"""/ Generate tokens for a batch without cache
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def GenerateWithCache(self, request, context):
|
||||||
|
"""/ Generate tokens for a batch with cache
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_TextGenerationServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'ServiceDiscovery': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.ServiceDiscovery,
|
||||||
|
request_deserializer=generate__pb2.Empty.FromString,
|
||||||
|
response_serializer=generate__pb2.ServiceDiscoveryResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
'ClearCache': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.ClearCache,
|
||||||
|
request_deserializer=generate__pb2.Empty.FromString,
|
||||||
|
response_serializer=generate__pb2.Empty.SerializeToString,
|
||||||
|
),
|
||||||
|
'Generate': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Generate,
|
||||||
|
request_deserializer=generate__pb2.Batch.FromString,
|
||||||
|
response_serializer=generate__pb2.Response.SerializeToString,
|
||||||
|
),
|
||||||
|
'GenerateWithCache': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.GenerateWithCache,
|
||||||
|
request_deserializer=generate__pb2.BatchCached.FromString,
|
||||||
|
response_serializer=generate__pb2.Response.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'generate.v1.TextGeneration', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class TextGeneration(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ServiceDiscovery(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ServiceDiscovery',
|
||||||
|
generate__pb2.Empty.SerializeToString,
|
||||||
|
generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ClearCache(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ClearCache',
|
||||||
|
generate__pb2.Empty.SerializeToString,
|
||||||
|
generate__pb2.Empty.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Generate(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/Generate',
|
||||||
|
generate__pb2.Batch.SerializeToString,
|
||||||
|
generate__pb2.Response.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GenerateWithCache(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/GenerateWithCache',
|
||||||
|
generate__pb2.BatchCached.SerializeToString,
|
||||||
|
generate__pb2.Response.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
@ -0,0 +1,169 @@
|
||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
import generate_pb2 as generate__pb2
|
||||||
|
|
||||||
|
|
||||||
|
class TextGenerationStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.ServiceDiscovery = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/ServiceDiscovery',
|
||||||
|
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||||
|
)
|
||||||
|
self.ClearCache = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/ClearCache',
|
||||||
|
request_serializer=generate__pb2.Empty.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.Empty.FromString,
|
||||||
|
)
|
||||||
|
self.Generate = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/Generate',
|
||||||
|
request_serializer=generate__pb2.Batch.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.Response.FromString,
|
||||||
|
)
|
||||||
|
self.GenerateWithCache = channel.unary_unary(
|
||||||
|
'/generate.v1.TextGeneration/GenerateWithCache',
|
||||||
|
request_serializer=generate__pb2.BatchCached.SerializeToString,
|
||||||
|
response_deserializer=generate__pb2.Response.FromString,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TextGenerationServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def ServiceDiscovery(self, request, context):
|
||||||
|
"""/ Service discovery
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def ClearCache(self, request, context):
|
||||||
|
"""/ Empties batch cache
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Generate(self, request, context):
|
||||||
|
"""/ Generate tokens for a batch without cache
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def GenerateWithCache(self, request, context):
|
||||||
|
"""/ Generate tokens for a batch with cache
|
||||||
|
"""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_TextGenerationServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'ServiceDiscovery': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.ServiceDiscovery,
|
||||||
|
request_deserializer=generate__pb2.Empty.FromString,
|
||||||
|
response_serializer=generate__pb2.ServiceDiscoveryResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
'ClearCache': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.ClearCache,
|
||||||
|
request_deserializer=generate__pb2.Empty.FromString,
|
||||||
|
response_serializer=generate__pb2.Empty.SerializeToString,
|
||||||
|
),
|
||||||
|
'Generate': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Generate,
|
||||||
|
request_deserializer=generate__pb2.Batch.FromString,
|
||||||
|
response_serializer=generate__pb2.Response.SerializeToString,
|
||||||
|
),
|
||||||
|
'GenerateWithCache': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.GenerateWithCache,
|
||||||
|
request_deserializer=generate__pb2.BatchCached.FromString,
|
||||||
|
response_serializer=generate__pb2.Response.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'generate.v1.TextGeneration', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class TextGeneration(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ServiceDiscovery(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ServiceDiscovery',
|
||||||
|
generate__pb2.Empty.SerializeToString,
|
||||||
|
generate__pb2.ServiceDiscoveryResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ClearCache(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/ClearCache',
|
||||||
|
generate__pb2.Empty.SerializeToString,
|
||||||
|
generate__pb2.Empty.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Generate(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/Generate',
|
||||||
|
generate__pb2.Batch.SerializeToString,
|
||||||
|
generate__pb2.Response.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GenerateWithCache(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/generate.v1.TextGeneration/GenerateWithCache',
|
||||||
|
generate__pb2.BatchCached.SerializeToString,
|
||||||
|
generate__pb2.Response.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
@ -0,0 +1,124 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
MODEL_NAME = "bigscience/bloom"
|
||||||
|
|
||||||
|
|
||||||
|
def match_suffix(text, suffix):
|
||||||
|
return text[-len(suffix) :] == suffix
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_weights(hub_path: Path, save_path: Path, tp_world_size: int):
|
||||||
|
save_paths = [
|
||||||
|
save_path / f"{MODEL_NAME}_tp-rank-{tp_rank}-of-{tp_world_size}.pty"
|
||||||
|
for tp_rank in range(tp_world_size)
|
||||||
|
]
|
||||||
|
|
||||||
|
if all(save_path.exists() for save_path in save_paths):
|
||||||
|
print("Weights are already prepared")
|
||||||
|
return
|
||||||
|
|
||||||
|
shards_state_dicts = [{} for _ in range(tp_world_size)]
|
||||||
|
|
||||||
|
for weight_path in tqdm(hub_path.glob("*.bin")):
|
||||||
|
state_dict = torch.load(weight_path, map_location="cpu")
|
||||||
|
|
||||||
|
keys = list(state_dict.keys())
|
||||||
|
for state_name in keys:
|
||||||
|
state = state_dict[state_name]
|
||||||
|
if any(
|
||||||
|
match_suffix(state_name, candidate)
|
||||||
|
for candidate in [
|
||||||
|
"self_attention.query_key_value.weight",
|
||||||
|
"self_attention.query_key_value.bias",
|
||||||
|
"mlp.dense_h_to_4h.weight",
|
||||||
|
"mlp.dense_h_to_4h.bias",
|
||||||
|
"word_embeddings.weight",
|
||||||
|
"lm_head.weight",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
output_size = state.shape[0]
|
||||||
|
assert output_size % tp_world_size == 0
|
||||||
|
block_size = output_size // tp_world_size
|
||||||
|
sharded_weights = torch.split(state, block_size, dim=0)
|
||||||
|
assert len(sharded_weights) == tp_world_size
|
||||||
|
for tp_rank, shard in enumerate(sharded_weights):
|
||||||
|
assert shard.shape[0] == block_size
|
||||||
|
if match_suffix(state_name, "lm_head.weight"):
|
||||||
|
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||||
|
else:
|
||||||
|
shards_state_dicts[tp_rank][
|
||||||
|
"transformer." + state_name
|
||||||
|
] = shard.detach().clone()
|
||||||
|
elif any(
|
||||||
|
match_suffix(state_name, candidate)
|
||||||
|
for candidate in [
|
||||||
|
"self_attention.dense.weight",
|
||||||
|
"mlp.dense_4h_to_h.weight",
|
||||||
|
"lm_head.weight",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
input_size = state.shape[1]
|
||||||
|
assert input_size % tp_world_size == 0
|
||||||
|
block_size = input_size // tp_world_size
|
||||||
|
sharded_weights = torch.split(state, block_size, dim=1)
|
||||||
|
assert len(sharded_weights) == tp_world_size
|
||||||
|
for tp_rank, shard in enumerate(sharded_weights):
|
||||||
|
assert shard.shape[1] == block_size
|
||||||
|
if match_suffix(state_name, "lm_head.weight"):
|
||||||
|
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||||
|
else:
|
||||||
|
shards_state_dicts[tp_rank][
|
||||||
|
"transformer." + state_name
|
||||||
|
] = shard.detach().clone()
|
||||||
|
elif any(
|
||||||
|
match_suffix(state_name, candidate)
|
||||||
|
for candidate in [
|
||||||
|
"self_attention.dense.bias",
|
||||||
|
"mlp.dense_4h_to_h.bias",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
shards_state_dicts[0][
|
||||||
|
"transformer." + state_name
|
||||||
|
] = state.detach().clone()
|
||||||
|
for tp_rank in range(1, tp_world_size):
|
||||||
|
shards_state_dicts[tp_rank][
|
||||||
|
"transformer." + state_name
|
||||||
|
] = torch.zeros_like(state)
|
||||||
|
else:
|
||||||
|
# We duplicate parameters across tp ranks
|
||||||
|
for tp_rank in range(tp_world_size):
|
||||||
|
shards_state_dicts[tp_rank][
|
||||||
|
"transformer." + state_name
|
||||||
|
] = state.detach().clone()
|
||||||
|
|
||||||
|
del state_dict[state_name] # delete key from state_dict
|
||||||
|
del state # delete tensor
|
||||||
|
|
||||||
|
# we save state_dict
|
||||||
|
for tp_rank, (save_path, shard_state_dict) in enumerate(
|
||||||
|
zip(save_paths, shards_state_dicts)
|
||||||
|
):
|
||||||
|
save_paths.append(save_path)
|
||||||
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if save_path.exists():
|
||||||
|
print(f"Skipping {save_path} as it already exists")
|
||||||
|
else:
|
||||||
|
torch.save(shard_state_dict, save_path)
|
||||||
|
|
||||||
|
return save_paths
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
parser = ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument("--hub-path", required=True, type=str)
|
||||||
|
parser.add_argument("--save-path", required=True, type=str)
|
||||||
|
parser.add_argument("--world-size", required=True, type=int)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
prepare_weights(Path(args.hub_path), Path(args.save_path), args.world_size)
|
|
@ -0,0 +1,91 @@
|
||||||
|
import asyncio
|
||||||
|
from grpc import aio
|
||||||
|
|
||||||
|
from grpc_reflection.v1alpha import reflection
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from bloom_inference.cache import Cache
|
||||||
|
from bloom_inference.model import BLOOM, Batch, BLOOMSharded
|
||||||
|
from bloom_inference.pb import generate_pb2_grpc, generate_pb2
|
||||||
|
|
||||||
|
|
||||||
|
class TextGeneration(generate_pb2_grpc.TextGenerationServicer):
|
||||||
|
def __init__(self, model: BLOOM, cache: Cache, server_urls: List[str]):
|
||||||
|
self.cache = cache
|
||||||
|
self.model = model
|
||||||
|
self.server_urls = server_urls
|
||||||
|
|
||||||
|
async def ServiceDiscovery(self, request, context):
|
||||||
|
return generate_pb2.ServiceDiscoveryResponse(urls=self.server_urls)
|
||||||
|
|
||||||
|
async def ClearCache(self, request, context):
|
||||||
|
self.cache.clear()
|
||||||
|
return generate_pb2.Empty()
|
||||||
|
|
||||||
|
async def Generate(self, request, context):
|
||||||
|
batch = Batch.from_batch_pb(request, self.model.tokenizer, self.model.device)
|
||||||
|
finished_generations, cache_entry = self.model.generate_token(batch)
|
||||||
|
self.cache.set(cache_entry)
|
||||||
|
|
||||||
|
return generate_pb2.Response(
|
||||||
|
finished=[
|
||||||
|
finished_generation.to_pb()
|
||||||
|
for finished_generation in finished_generations
|
||||||
|
],
|
||||||
|
cache_entry=cache_entry.to_pb() if cache_entry else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def GenerateWithCache(self, request, context):
|
||||||
|
batch = Batch.from_batch_cached_pb(request, self.cache)
|
||||||
|
finished_generations, cache_entry = self.model.generate_token(batch)
|
||||||
|
self.cache.set(cache_entry)
|
||||||
|
|
||||||
|
return generate_pb2.Response(
|
||||||
|
finished=[
|
||||||
|
finished_generation.to_pb()
|
||||||
|
for finished_generation in finished_generations
|
||||||
|
],
|
||||||
|
cache_entry=cache_entry.to_pb() if cache_entry else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serve(model_name, sharded, shard_directory):
|
||||||
|
async def serve_inner(
|
||||||
|
model_name: str,
|
||||||
|
sharded: bool = False,
|
||||||
|
shard_directory: Optional[Path] = None,
|
||||||
|
):
|
||||||
|
unix_socket_template = "unix:///tmp/bloom-inference-{}"
|
||||||
|
if sharded:
|
||||||
|
if shard_directory is None:
|
||||||
|
raise ValueError("shard_directory must be set when sharded is True")
|
||||||
|
model = BLOOMSharded(model_name, shard_directory)
|
||||||
|
server_urls = [
|
||||||
|
unix_socket_template.format(rank) for rank in range(model.world_size)
|
||||||
|
]
|
||||||
|
local_url = unix_socket_template.format(model.rank)
|
||||||
|
else:
|
||||||
|
model = BLOOM(model_name)
|
||||||
|
local_url = unix_socket_template.format(0)
|
||||||
|
server_urls = [local_url]
|
||||||
|
|
||||||
|
server = aio.server()
|
||||||
|
generate_pb2_grpc.add_TextGenerationServicer_to_server(
|
||||||
|
TextGeneration(model, Cache(), server_urls), server
|
||||||
|
)
|
||||||
|
SERVICE_NAMES = (
|
||||||
|
generate_pb2.DESCRIPTOR.services_by_name["TextGeneration"].full_name,
|
||||||
|
reflection.SERVICE_NAME,
|
||||||
|
)
|
||||||
|
reflection.enable_server_reflection(SERVICE_NAMES, server)
|
||||||
|
server.add_insecure_port(local_url)
|
||||||
|
await server.start()
|
||||||
|
print("Server started at {}".format(local_url))
|
||||||
|
await server.wait_for_termination()
|
||||||
|
|
||||||
|
asyncio.run(serve_inner(model_name, sharded, shard_directory))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
serve("bigscience/bloom-560m", True, Path("/tmp/models"))
|
|
@ -0,0 +1,102 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
|
|
||||||
|
def match_suffix(text, suffix):
|
||||||
|
return text[-len(suffix) :] == suffix
|
||||||
|
|
||||||
|
|
||||||
|
def shard_model(model_name: str, path: Path, tp_world_size: int, dtype: torch.dtype):
|
||||||
|
"""BLOOM specific sharding mechanism"""
|
||||||
|
save_paths = [
|
||||||
|
path / f"{model_name}_tp-rank-{tp_rank}-of-{tp_world_size}.pty"
|
||||||
|
for tp_rank in range(tp_world_size)
|
||||||
|
]
|
||||||
|
if all(save_path.exists() for save_path in save_paths):
|
||||||
|
print("Loading already cached values")
|
||||||
|
return save_paths
|
||||||
|
|
||||||
|
model: nn.Module = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_name, torch_dtype=dtype, local_files_only=True
|
||||||
|
)
|
||||||
|
|
||||||
|
shards_state_dicts = [{} for _ in range(tp_world_size)]
|
||||||
|
state_dict = model.state_dict()
|
||||||
|
keys = list(state_dict.keys())
|
||||||
|
for state_name in keys:
|
||||||
|
print(state_name)
|
||||||
|
state = state_dict[state_name]
|
||||||
|
if any(
|
||||||
|
match_suffix(state_name, candidate)
|
||||||
|
for candidate in [
|
||||||
|
"self_attention.query_key_value.weight",
|
||||||
|
"self_attention.query_key_value.bias",
|
||||||
|
"mlp.dense_h_to_4h.weight",
|
||||||
|
"mlp.dense_h_to_4h.bias",
|
||||||
|
"transformer.word_embeddings.weight",
|
||||||
|
"lm_head.weight",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
output_size = state.shape[0]
|
||||||
|
assert output_size % tp_world_size == 0
|
||||||
|
block_size = output_size // tp_world_size
|
||||||
|
sharded_weights = torch.split(state, block_size, dim=0)
|
||||||
|
assert len(sharded_weights) == tp_world_size
|
||||||
|
for tp_rank, shard in enumerate(sharded_weights):
|
||||||
|
assert shard.shape[0] == block_size
|
||||||
|
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||||
|
elif any(
|
||||||
|
match_suffix(state_name, candidate)
|
||||||
|
for candidate in [
|
||||||
|
"self_attention.dense.weight",
|
||||||
|
"mlp.dense_4h_to_h.weight",
|
||||||
|
"lm_head.weight",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
input_size = state.shape[1]
|
||||||
|
assert input_size % tp_world_size == 0
|
||||||
|
block_size = input_size // tp_world_size
|
||||||
|
sharded_weights = torch.split(state, block_size, dim=1)
|
||||||
|
assert len(sharded_weights) == tp_world_size
|
||||||
|
for tp_rank, shard in enumerate(sharded_weights):
|
||||||
|
assert shard.shape[1] == block_size
|
||||||
|
shards_state_dicts[tp_rank][state_name] = shard.detach().clone()
|
||||||
|
elif any(
|
||||||
|
match_suffix(state_name, candidate)
|
||||||
|
for candidate in [
|
||||||
|
"self_attention.dense.bias",
|
||||||
|
"mlp.dense_4h_to_h.bias",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
shards_state_dicts[0][state_name] = state.detach().clone()
|
||||||
|
for tp_rank in range(1, tp_world_size):
|
||||||
|
shards_state_dicts[tp_rank][state_name] = torch.zeros_like(state)
|
||||||
|
else:
|
||||||
|
# We duplicate parameters across tp ranks
|
||||||
|
for tp_rank in range(tp_world_size):
|
||||||
|
shards_state_dicts[tp_rank][state_name] = state.detach().clone()
|
||||||
|
|
||||||
|
del state_dict[state_name] # delete key from state_dict
|
||||||
|
del state # delete tensor
|
||||||
|
|
||||||
|
# we save state_dict
|
||||||
|
for tp_rank, (save_path, shard_state_dict) in enumerate(
|
||||||
|
zip(save_paths, shards_state_dicts)
|
||||||
|
):
|
||||||
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
torch.save(shard_state_dict, save_path)
|
||||||
|
save_paths.append(save_path)
|
||||||
|
|
||||||
|
return save_paths
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
model_name = "bigscience/bloom"
|
||||||
|
save_path = Path("/data/shards")
|
||||||
|
tp_world_size = 8
|
||||||
|
dtype = torch.bfloat16
|
||||||
|
|
||||||
|
shard_model(model_name, save_path, tp_world_size=tp_world_size, dtype=dtype)
|
|
@ -0,0 +1,95 @@
|
||||||
|
import os
|
||||||
|
import contextlib
|
||||||
|
import torch
|
||||||
|
import torch.distributed
|
||||||
|
from transformers.generation_logits_process import (
|
||||||
|
LogitsProcessorList,
|
||||||
|
TemperatureLogitsWarper,
|
||||||
|
TopPLogitsWarper,
|
||||||
|
TopKLogitsWarper,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Sampling:
|
||||||
|
def __call__(self, logits):
|
||||||
|
probs = torch.nn.functional.softmax(logits, dim=-1)
|
||||||
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
return next_tokens
|
||||||
|
|
||||||
|
|
||||||
|
class Greedy:
|
||||||
|
def __call__(self, logits):
|
||||||
|
return logits.argmax(dim=-1)
|
||||||
|
|
||||||
|
|
||||||
|
class NextTokenChooser:
|
||||||
|
def __init__(self, temperature=1.0, top_k=None, top_p=None, do_sample=False):
|
||||||
|
warpers = LogitsProcessorList()
|
||||||
|
# the following idea is largely copied from this PR: https://github.com/huggingface/transformers/pull/5420/files
|
||||||
|
# all samplers can be found in `generation_utils_samplers.py`
|
||||||
|
sampling = do_sample
|
||||||
|
if temperature is not None and temperature != 1.0:
|
||||||
|
temperature = float(temperature)
|
||||||
|
warpers.append(TemperatureLogitsWarper(temperature))
|
||||||
|
sampling = True
|
||||||
|
if top_k is not None and top_k != 0:
|
||||||
|
warpers.append(TopKLogitsWarper(top_k=top_k))
|
||||||
|
sampling = True
|
||||||
|
if top_p is not None and top_p < 1.0:
|
||||||
|
warpers.append(TopPLogitsWarper(top_p=top_p))
|
||||||
|
sampling = True
|
||||||
|
|
||||||
|
self.warpers = warpers
|
||||||
|
self.choice = Sampling() if sampling else Greedy()
|
||||||
|
|
||||||
|
def __call__(self, input_ids, scores):
|
||||||
|
scores = self.warpers(input_ids, scores)
|
||||||
|
next_ids = self.choice(scores)
|
||||||
|
return next_ids.unsqueeze(-1)
|
||||||
|
|
||||||
|
|
||||||
|
class StoppingCriteria:
|
||||||
|
def __init__(self, max_new_tokens=20):
|
||||||
|
self.max_new_tokens = max_new_tokens
|
||||||
|
self.current_tokens = 0
|
||||||
|
|
||||||
|
def __call__(self, all_ids):
|
||||||
|
self.current_tokens += 1
|
||||||
|
if self.current_tokens >= self.max_new_tokens:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_torch_distributed():
|
||||||
|
rank = int(os.getenv("RANK", "0"))
|
||||||
|
world_size = int(os.getenv("WORLD_SIZE", "1"))
|
||||||
|
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
# initialized `torch.distributed`
|
||||||
|
# Set the device id.
|
||||||
|
assert world_size <= torch.cuda.device_count(), "Each process is one gpu"
|
||||||
|
device = rank % torch.cuda.device_count()
|
||||||
|
torch.cuda.set_device(device)
|
||||||
|
backend = "nccl"
|
||||||
|
else:
|
||||||
|
backend = "gloo"
|
||||||
|
|
||||||
|
# Call the init process.
|
||||||
|
torch.distributed.init_process_group(
|
||||||
|
backend=backend,
|
||||||
|
world_size=world_size,
|
||||||
|
rank=rank,
|
||||||
|
init_method="tcp://localhost:6000",
|
||||||
|
)
|
||||||
|
|
||||||
|
return torch.distributed.distributed_c10d._get_default_group(), rank, world_size
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def set_default_dtype(dtype):
|
||||||
|
saved_dtype = torch.get_default_dtype()
|
||||||
|
torch.set_default_dtype(dtype)
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
torch.set_default_dtype(saved_dtype)
|
|
@ -0,0 +1,480 @@
|
||||||
|
[[package]]
|
||||||
|
name = "accelerate"
|
||||||
|
version = "0.12.0"
|
||||||
|
description = "Accelerate"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7.0"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
numpy = ">=1.17"
|
||||||
|
packaging = ">=20.0"
|
||||||
|
psutil = "*"
|
||||||
|
pyyaml = "*"
|
||||||
|
torch = ">=1.4.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["black (>=22.0,<23.0)", "datasets", "deepspeed (<0.7.0)", "evaluate", "flake8 (>=3.8.3)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scipy", "sklearn", "tqdm", "transformers"]
|
||||||
|
quality = ["black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)"]
|
||||||
|
sagemaker = ["sagemaker"]
|
||||||
|
test_dev = ["datasets", "deepspeed (<0.7.0)", "evaluate", "scipy", "sklearn", "tqdm", "transformers"]
|
||||||
|
test_prod = ["parameterized", "pytest", "pytest-subtests", "pytest-xdist"]
|
||||||
|
test_trackers = ["comet-ml", "tensorboard", "wandb"]
|
||||||
|
testing = ["datasets", "deepspeed (<0.7.0)", "evaluate", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scipy", "sklearn", "tqdm", "transformers"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "click"
|
||||||
|
version = "8.1.3"
|
||||||
|
description = "Composable command line interface toolkit"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorama"
|
||||||
|
version = "0.4.5"
|
||||||
|
description = "Cross-platform colored terminal text."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "grpcio"
|
||||||
|
version = "1.49.1"
|
||||||
|
description = "HTTP/2-based RPC framework"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
six = ">=1.5.2"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
protobuf = ["grpcio-tools (>=1.49.1)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "grpcio-reflection"
|
||||||
|
version = "1.49.1"
|
||||||
|
description = "Standard Protobuf Reflection Service for gRPC"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
grpcio = ">=1.49.1"
|
||||||
|
protobuf = ">=4.21.3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "grpcio-tools"
|
||||||
|
version = "1.49.1"
|
||||||
|
description = "Protobuf code generator for gRPC"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
grpcio = ">=1.49.1"
|
||||||
|
protobuf = ">=4.21.3,<5.0dev"
|
||||||
|
setuptools = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "numpy"
|
||||||
|
version = "1.23.3"
|
||||||
|
description = "NumPy is the fundamental package for array computing with Python."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "packaging"
|
||||||
|
version = "21.3"
|
||||||
|
description = "Core utilities for Python packages"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "protobuf"
|
||||||
|
version = "4.21.7"
|
||||||
|
description = ""
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "psutil"
|
||||||
|
version = "5.9.2"
|
||||||
|
description = "Cross-platform lib for process and system monitoring in Python."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyparsing"
|
||||||
|
version = "3.0.9"
|
||||||
|
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6.8"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
diagrams = ["jinja2", "railroad-diagrams"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "PyYAML"
|
||||||
|
version = "6.0"
|
||||||
|
description = "YAML parser and emitter for Python"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "setuptools"
|
||||||
|
version = "65.4.1"
|
||||||
|
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
|
||||||
|
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
|
||||||
|
testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "six"
|
||||||
|
version = "1.16.0"
|
||||||
|
description = "Python 2 and 3 compatibility utilities"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "torch"
|
||||||
|
version = "1.12.1"
|
||||||
|
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7.0"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
typing-extensions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typer"
|
||||||
|
version = "0.6.1"
|
||||||
|
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
click = ">=7.1.1,<9.0.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
|
||||||
|
dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"]
|
||||||
|
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)"]
|
||||||
|
test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typing-extensions"
|
||||||
|
version = "4.3.0"
|
||||||
|
description = "Backported and Experimental Type Hints for Python 3.7+"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
lock-version = "1.1"
|
||||||
|
python-versions = "^3.9"
|
||||||
|
content-hash = "cedd0aebeb3731e2bbddf017a2ee6074c285866354272f8dfe930e9606437a25"
|
||||||
|
|
||||||
|
[metadata.files]
|
||||||
|
accelerate = [
|
||||||
|
{file = "accelerate-0.12.0-py3-none-any.whl", hash = "sha256:7742ca5c9f15dd1e0a283305599c196e260af4717a561d1f544aeab27d828af6"},
|
||||||
|
{file = "accelerate-0.12.0.tar.gz", hash = "sha256:e8b119c94fac31877620d5f9de311164ec81fa9dc9e175f0d0d4f50fc8d79473"},
|
||||||
|
]
|
||||||
|
click = [
|
||||||
|
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
|
||||||
|
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
|
||||||
|
]
|
||||||
|
colorama = [
|
||||||
|
{file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
|
||||||
|
{file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
|
||||||
|
]
|
||||||
|
grpcio = [
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:fd86040232e805b8e6378b2348c928490ee595b058ce9aaa27ed8e4b0f172b20"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6fd0c9cede9552bf00f8c5791d257d5bf3790d7057b26c59df08be5e7a1e021d"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:d0d402e158d4e84e49c158cb5204119d55e1baf363ee98d6cb5dce321c3a065d"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:822ceec743d42a627e64ea266059a62d214c5a3cdfcd0d7fe2b7a8e4e82527c7"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2106d9c16527f0a85e2eea6e6b91a74fc99579c60dd810d8690843ea02bc0f5f"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:52dd02b7e7868233c571b49bc38ebd347c3bb1ff8907bb0cb74cb5f00c790afc"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:120fecba2ec5d14b5a15d11063b39783fda8dc8d24addd83196acb6582cabd9b"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-win32.whl", hash = "sha256:f1a3b88e3c53c1a6e6bed635ec1bbb92201bb6a1f2db186179f7f3f244829788"},
|
||||||
|
{file = "grpcio-1.49.1-cp310-cp310-win_amd64.whl", hash = "sha256:a7d0017b92d3850abea87c1bdec6ea41104e71c77bca44c3e17f175c6700af62"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:9fb17ff8c0d56099ac6ebfa84f670c5a62228d6b5c695cf21c02160c2ac1446b"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:075f2d06e3db6b48a2157a1bcd52d6cbdca980dd18988fe6afdb41795d51625f"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:46d93a1b4572b461a227f1db6b8d35a88952db1c47e5fadcf8b8a2f0e1dd9201"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc79b2b37d779ac42341ddef40ad5bf0966a64af412c89fc2b062e3ddabb093f"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5f8b3a971c7820ea9878f3fd70086240a36aeee15d1b7e9ecbc2743b0e785568"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49b301740cf5bc8fed4fee4c877570189ae3951432d79fa8e524b09353659811"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-win32.whl", hash = "sha256:1c66a25afc6c71d357867b341da594a5587db5849b48f4b7d5908d236bb62ede"},
|
||||||
|
{file = "grpcio-1.49.1-cp311-cp311-win_amd64.whl", hash = "sha256:6b6c3a95d27846f4145d6967899b3ab25fffc6ae99544415e1adcacef84842d2"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:1cc400c8a2173d1c042997d98a9563e12d9bb3fb6ad36b7f355bc77c7663b8af"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:34f736bd4d0deae90015c0e383885b431444fe6b6c591dea288173df20603146"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:196082b9c89ebf0961dcd77cb114bed8171964c8e3063b9da2fb33536a6938ed"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c9f89c42749890618cd3c2464e1fbf88446e3d2f67f1e334c8e5db2f3272bbd"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64419cb8a5b612cdb1550c2fd4acbb7d4fb263556cf4625f25522337e461509e"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8a5272061826e6164f96e3255405ef6f73b88fd3e8bef464c7d061af8585ac62"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ea9d0172445241ad7cb49577314e39d0af2c5267395b3561d7ced5d70458a9f3"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-win32.whl", hash = "sha256:2070e87d95991473244c72d96d13596c751cb35558e11f5df5414981e7ed2492"},
|
||||||
|
{file = "grpcio-1.49.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fcedcab49baaa9db4a2d240ac81f2d57eb0052b1c6a9501b46b8ae912720fbf"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:afbb3475cf7f4f7d380c2ca37ee826e51974f3e2665613996a91d6a58583a534"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:a4f9ba141380abde6c3adc1727f21529137a2552002243fa87c41a07e528245c"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:cf0a1fb18a7204b9c44623dfbd1465b363236ce70c7a4ed30402f9f60d8b743b"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:17bb6fe72784b630728c6cff9c9d10ccc3b6d04e85da6e0a7b27fb1d135fac62"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18305d5a082d1593b005a895c10041f833b16788e88b02bb81061f5ebcc465df"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b6a1b39e59ac5a3067794a0e498911cf2e37e4b19ee9e9977dc5e7051714f13f"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e20d59aafc086b1cc68400463bddda6e41d3e5ed30851d1e2e0f6a2e7e342d3"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-win32.whl", hash = "sha256:e1e83233d4680863a421f3ee4a7a9b80d33cd27ee9ed7593bc93f6128302d3f2"},
|
||||||
|
{file = "grpcio-1.49.1-cp38-cp38-win_amd64.whl", hash = "sha256:221d42c654d2a41fa31323216279c73ed17d92f533bc140a3390cc1bd78bf63c"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:fa9e6e61391e99708ac87fc3436f6b7b9c6b845dc4639b406e5e61901e1aacde"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9b449e966ef518ce9c860d21f8afe0b0f055220d95bc710301752ac1db96dd6a"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:aa34d2ad9f24e47fa9a3172801c676e4037d862247e39030165fe83821a7aafd"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5207f4eed1b775d264fcfe379d8541e1c43b878f2b63c0698f8f5c56c40f3d68"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b24a74651438d45619ac67004638856f76cc13d78b7478f2457754cbcb1c8ad"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fe763781669790dc8b9618e7e677c839c87eae6cf28b655ee1fa69ae04eea03f"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2f2ff7ba0f8f431f32d4b4bc3a3713426949d3533b08466c4ff1b2b475932ca8"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-win32.whl", hash = "sha256:08ff74aec8ff457a89b97152d36cb811dcc1d17cd5a92a65933524e363327394"},
|
||||||
|
{file = "grpcio-1.49.1-cp39-cp39-win_amd64.whl", hash = "sha256:274ffbb39717918c514b35176510ae9be06e1d93121e84d50b350861dcb9a705"},
|
||||||
|
{file = "grpcio-1.49.1.tar.gz", hash = "sha256:d4725fc9ec8e8822906ae26bb26f5546891aa7fbc3443de970cc556d43a5c99f"},
|
||||||
|
]
|
||||||
|
grpcio-reflection = [
|
||||||
|
{file = "grpcio-reflection-1.49.1.tar.gz", hash = "sha256:b755dfe61d5255a02fb8d0d845bd0027847dee68bf0763a2b286d664ed07ec4d"},
|
||||||
|
{file = "grpcio_reflection-1.49.1-py3-none-any.whl", hash = "sha256:70a325a83c1c1ab583d368711e5733cbef5e068ad2c17cbe77df6e47e0311d1f"},
|
||||||
|
]
|
||||||
|
grpcio-tools = [
|
||||||
|
{file = "grpcio-tools-1.49.1.tar.gz", hash = "sha256:84cc64e5b46bad43d5d7bd2fd772b656eba0366961187a847e908e2cb735db91"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2dfb6c7ece84d46bd690b23d3e060d18115c8bc5047d2e8a33e6747ed323a348"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:8f452a107c054a04db2570f7851a07f060313c6e841b0d394ce6030d598290e6"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:6a198871b582287213c4d70792bf275e1d7cf34eed1d019f534ddf4cd15ab039"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0cca67a7d0287bdc855d81fdd38dc949c4273273a74f832f9e520abe4f20bc6"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaff4c89eecb37c247b93025410db68114d97fa093cbb028e9bd7cda5912473"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bb8773118ad315db317d7b22b5ff75d649ca20931733281209e7cbd8c0fad53e"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cc5534023735b8a8f56760b7c533918f874ce5a9064d7c5456d2709ae2b31f9"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-win32.whl", hash = "sha256:d277642acbe305f5586f9597b78fb9970d6633eb9f89c61e429c92c296c37129"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp310-cp310-win_amd64.whl", hash = "sha256:eed599cf08fc1a06c72492d3c5750c32f58de3750eddd984af1f257c14326701"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:9e5c13809ab2f245398e8446c4c3b399a62d591db651e46806cccf52a700452e"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:ab3d0ee9623720ee585fdf3753b3755d3144a4a8ae35bca8e3655fa2f41056be"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ba87e3512bc91d78bf9febcfb522eadda171d2d4ddaf886066b0f01aa4929ad"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e13b3643e7577a3ec13b79689eb4d7548890b1e104c04b9ed6557a3c3dd452"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:324f67d9cb4b7058b6ce45352fb64c20cc1fa04c34d97ad44772cfe6a4ae0cf5"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a64bab81b220c50033f584f57978ebbea575f09c1ccee765cd5c462177988098"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-win32.whl", hash = "sha256:f632d376f92f23e5931697a3acf1b38df7eb719774213d93c52e02acd2d529ac"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp311-cp311-win_amd64.whl", hash = "sha256:28ff2b978d9509474928b9c096a0cce4eaa9c8f7046136aee1545f6211ed8126"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:46afd3cb7e555187451a5d283f108cdef397952a662cb48680afc615b158864a"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:9284568b728e41fa8f7e9c2e7399545d605f75d8072ef0e9aa2a05655cb679eb"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:aa34442cf787732cb41f2aa6172007e24f480b8b9d3dc5166de80d63e9072ea4"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b8c9eb5a4250905414cd53a68caea3eb8f0c515aadb689e6e81b71ebe9ab5c6"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab15db024051bf21feb21c29cb2c3ea0a2e4f5cf341d46ef76e17fcf6aaef164"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:502084b622f758bef620a9107c2db9fcdf66d26c7e0e481d6bb87db4dc917d70"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4085890b77c640085f82bf1e90a0ea166ce48000bc2f5180914b974783c9c0a8"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-win32.whl", hash = "sha256:da0edb984699769ce02e18e3392d54b59a7a3f93acd285a68043f5bde4fc028e"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9887cd622770271101a7dd1832845d64744c3f88fd11ccb2620394079197a42e"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:8440fe7dae6a40c279e3a24b82793735babd38ecbb0d07bb712ff9c8963185d9"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:b5de2bb7dd6b6231da9b1556ade981513330b740e767f1d902c71ceee0a7d196"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:1e6f06a763aea7836b63d9c117347f2bf7038008ceef72758815c9e09c5fb1fc"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e31562f90120318c5395aabec0f2f69ad8c14b6676996b7730d9d2eaf9415d57"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49ef9a4e389a618157a9daa9fafdfeeaef1ece9adda7f50f85db928f24d4b3e8"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b384cb8e8d9bcb55ee8f9b064374561c7a1a05d848249581403d36fc7060032f"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:73732f77943ac3e898879cbb29c27253aa3c47566b8a59780fd24c6a54de1b66"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-win32.whl", hash = "sha256:b594b2745a5ba9e7a76ce561bc5ab40bc65bb44743c505529b1e4f12af29104d"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp38-cp38-win_amd64.whl", hash = "sha256:680fbc88f8709ddcabb88f86749f2d8e429160890cff2c70680880a6970d4eef"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:e8c3869121860f6767eedb7d24fc54dfd71e737fdfbb26e1334684606f3274fd"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:73e9d7c886ba10e20c97d1dab0ff961ba5800757ae5e31be21b1cda8130c52f8"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:1760de2dd2c4f08de87b039043a4797f3c17193656e7e3eb84e92f0517083c0c"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd4b1e216dd04d9245ee8f4e601a1f98c25e6e417ea5cf8d825c50589a8b447e"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1c28751ab5955cae563d07677e799233f0fe1c0fc49d9cbd61ff1957e83617f"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c24239c3ee9ed16314c14b4e24437b5079ebc344f343f33629a582f8699f583b"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:892d3dacf1942820f0b7a868a30e6fbcdf5bec08543b682c7274b0101cee632d"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-win32.whl", hash = "sha256:704d21509ec06efc9d034dbe70e7152715aac004941f4f0f553cf3a0aff15bd5"},
|
||||||
|
{file = "grpcio_tools-1.49.1-cp39-cp39-win_amd64.whl", hash = "sha256:1efa0c221c719433f441ac0e026fc3c4dbc9a1a08a552ecdc707775e2f2fbbae"},
|
||||||
|
]
|
||||||
|
numpy = [
|
||||||
|
{file = "numpy-1.23.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9f707b5bb73bf277d812ded9896f9512a43edff72712f31667d0a8c2f8e71ee"},
|
||||||
|
{file = "numpy-1.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffcf105ecdd9396e05a8e58e81faaaf34d3f9875f137c7372450baa5d77c9a54"},
|
||||||
|
{file = "numpy-1.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ea3f98a0ffce3f8f57675eb9119f3f4edb81888b6874bc1953f91e0b1d4f440"},
|
||||||
|
{file = "numpy-1.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004f0efcb2fe1c0bd6ae1fcfc69cc8b6bf2407e0f18be308612007a0762b4089"},
|
||||||
|
{file = "numpy-1.23.3-cp310-cp310-win32.whl", hash = "sha256:98dcbc02e39b1658dc4b4508442a560fe3ca5ca0d989f0df062534e5ca3a5c1a"},
|
||||||
|
{file = "numpy-1.23.3-cp310-cp310-win_amd64.whl", hash = "sha256:39a664e3d26ea854211867d20ebcc8023257c1800ae89773cbba9f9e97bae036"},
|
||||||
|
{file = "numpy-1.23.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1f27b5322ac4067e67c8f9378b41c746d8feac8bdd0e0ffede5324667b8a075c"},
|
||||||
|
{file = "numpy-1.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ad3ec9a748a8943e6eb4358201f7e1c12ede35f510b1a2221b70af4bb64295c"},
|
||||||
|
{file = "numpy-1.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdc9febce3e68b697d931941b263c59e0c74e8f18861f4064c1f712562903411"},
|
||||||
|
{file = "numpy-1.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:301c00cf5e60e08e04d842fc47df641d4a181e651c7135c50dc2762ffe293dbd"},
|
||||||
|
{file = "numpy-1.23.3-cp311-cp311-win32.whl", hash = "sha256:7cd1328e5bdf0dee621912f5833648e2daca72e3839ec1d6695e91089625f0b4"},
|
||||||
|
{file = "numpy-1.23.3-cp311-cp311-win_amd64.whl", hash = "sha256:8355fc10fd33a5a70981a5b8a0de51d10af3688d7a9e4a34fcc8fa0d7467bb7f"},
|
||||||
|
{file = "numpy-1.23.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc6e8da415f359b578b00bcfb1d08411c96e9a97f9e6c7adada554a0812a6cc6"},
|
||||||
|
{file = "numpy-1.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:22d43376ee0acd547f3149b9ec12eec2f0ca4a6ab2f61753c5b29bb3e795ac4d"},
|
||||||
|
{file = "numpy-1.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a64403f634e5ffdcd85e0b12c08f04b3080d3e840aef118721021f9b48fc1460"},
|
||||||
|
{file = "numpy-1.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd9d3abe5774404becdb0748178b48a218f1d8c44e0375475732211ea47c67e"},
|
||||||
|
{file = "numpy-1.23.3-cp38-cp38-win32.whl", hash = "sha256:f8c02ec3c4c4fcb718fdf89a6c6f709b14949408e8cf2a2be5bfa9c49548fd85"},
|
||||||
|
{file = "numpy-1.23.3-cp38-cp38-win_amd64.whl", hash = "sha256:e868b0389c5ccfc092031a861d4e158ea164d8b7fdbb10e3b5689b4fc6498df6"},
|
||||||
|
{file = "numpy-1.23.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09f6b7bdffe57fc61d869a22f506049825d707b288039d30f26a0d0d8ea05164"},
|
||||||
|
{file = "numpy-1.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c79d7cf86d049d0c5089231a5bcd31edb03555bd93d81a16870aa98c6cfb79d"},
|
||||||
|
{file = "numpy-1.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d5420053bbb3dd64c30e58f9363d7a9c27444c3648e61460c1237f9ec3fa14"},
|
||||||
|
{file = "numpy-1.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5422d6a1ea9b15577a9432e26608c73a78faf0b9039437b075cf322c92e98e7"},
|
||||||
|
{file = "numpy-1.23.3-cp39-cp39-win32.whl", hash = "sha256:c1ba66c48b19cc9c2975c0d354f24058888cdc674bebadceb3cdc9ec403fb5d1"},
|
||||||
|
{file = "numpy-1.23.3-cp39-cp39-win_amd64.whl", hash = "sha256:78a63d2df1d947bd9d1b11d35564c2f9e4b57898aae4626638056ec1a231c40c"},
|
||||||
|
{file = "numpy-1.23.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:17c0e467ade9bda685d5ac7f5fa729d8d3e76b23195471adae2d6a6941bd2c18"},
|
||||||
|
{file = "numpy-1.23.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91b8d6768a75247026e951dce3b2aac79dc7e78622fc148329135ba189813584"},
|
||||||
|
{file = "numpy-1.23.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:94c15ca4e52671a59219146ff584488907b1f9b3fc232622b47e2cf832e94fb8"},
|
||||||
|
{file = "numpy-1.23.3.tar.gz", hash = "sha256:51bf49c0cd1d52be0a240aa66f3458afc4b95d8993d2d04f0d91fa60c10af6cd"},
|
||||||
|
]
|
||||||
|
packaging = [
|
||||||
|
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
||||||
|
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
||||||
|
]
|
||||||
|
protobuf = [
|
||||||
|
{file = "protobuf-4.21.7-cp310-abi3-win32.whl", hash = "sha256:c7cb105d69a87416bd9023e64324e1c089593e6dae64d2536f06bcbe49cd97d8"},
|
||||||
|
{file = "protobuf-4.21.7-cp310-abi3-win_amd64.whl", hash = "sha256:3ec85328a35a16463c6f419dbce3c0fc42b3e904d966f17f48bae39597c7a543"},
|
||||||
|
{file = "protobuf-4.21.7-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:db9056b6a11cb5131036d734bcbf91ef3ef9235d6b681b2fc431cbfe5a7f2e56"},
|
||||||
|
{file = "protobuf-4.21.7-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:ca200645d6235ce0df3ccfdff1567acbab35c4db222a97357806e015f85b5744"},
|
||||||
|
{file = "protobuf-4.21.7-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:b019c79e23a80735cc8a71b95f76a49a262f579d6b84fd20a0b82279f40e2cc1"},
|
||||||
|
{file = "protobuf-4.21.7-cp37-cp37m-win32.whl", hash = "sha256:d3f89ccf7182293feba2de2739c8bf34fed1ed7c65a5cf987be00311acac57c1"},
|
||||||
|
{file = "protobuf-4.21.7-cp37-cp37m-win_amd64.whl", hash = "sha256:a74d96cd960b87b4b712797c741bb3ea3a913f5c2dc4b6cbe9c0f8360b75297d"},
|
||||||
|
{file = "protobuf-4.21.7-cp38-cp38-win32.whl", hash = "sha256:8e09d1916386eca1ef1353767b6efcebc0a6859ed7f73cb7fb974feba3184830"},
|
||||||
|
{file = "protobuf-4.21.7-cp38-cp38-win_amd64.whl", hash = "sha256:9e355f2a839d9930d83971b9f562395e13493f0e9211520f8913bd11efa53c02"},
|
||||||
|
{file = "protobuf-4.21.7-cp39-cp39-win32.whl", hash = "sha256:f370c0a71712f8965023dd5b13277444d3cdfecc96b2c778b0e19acbfd60df6e"},
|
||||||
|
{file = "protobuf-4.21.7-cp39-cp39-win_amd64.whl", hash = "sha256:9643684232b6b340b5e63bb69c9b4904cdd39e4303d498d1a92abddc7e895b7f"},
|
||||||
|
{file = "protobuf-4.21.7-py2.py3-none-any.whl", hash = "sha256:8066322588d4b499869bf9f665ebe448e793036b552f68c585a9b28f1e393f66"},
|
||||||
|
{file = "protobuf-4.21.7-py3-none-any.whl", hash = "sha256:58b81358ec6c0b5d50df761460ae2db58405c063fd415e1101209221a0a810e1"},
|
||||||
|
{file = "protobuf-4.21.7.tar.gz", hash = "sha256:71d9dba03ed3432c878a801e2ea51e034b0ea01cf3a4344fb60166cb5f6c8757"},
|
||||||
|
]
|
||||||
|
psutil = [
|
||||||
|
{file = "psutil-5.9.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:8f024fbb26c8daf5d70287bb3edfafa22283c255287cf523c5d81721e8e5d82c"},
|
||||||
|
{file = "psutil-5.9.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:b2f248ffc346f4f4f0d747ee1947963613216b06688be0be2e393986fe20dbbb"},
|
||||||
|
{file = "psutil-5.9.2-cp27-cp27m-win32.whl", hash = "sha256:b1928b9bf478d31fdffdb57101d18f9b70ed4e9b0e41af751851813547b2a9ab"},
|
||||||
|
{file = "psutil-5.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:404f4816c16a2fcc4eaa36d7eb49a66df2d083e829d3e39ee8759a411dbc9ecf"},
|
||||||
|
{file = "psutil-5.9.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:94e621c6a4ddb2573d4d30cba074f6d1aa0186645917df42c811c473dd22b339"},
|
||||||
|
{file = "psutil-5.9.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:256098b4f6ffea6441eb54ab3eb64db9ecef18f6a80d7ba91549195d55420f84"},
|
||||||
|
{file = "psutil-5.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:614337922702e9be37a39954d67fdb9e855981624d8011a9927b8f2d3c9625d9"},
|
||||||
|
{file = "psutil-5.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39ec06dc6c934fb53df10c1672e299145ce609ff0611b569e75a88f313634969"},
|
||||||
|
{file = "psutil-5.9.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3ac2c0375ef498e74b9b4ec56df3c88be43fe56cac465627572dbfb21c4be34"},
|
||||||
|
{file = "psutil-5.9.2-cp310-cp310-win32.whl", hash = "sha256:e4c4a7636ffc47b7141864f1c5e7d649f42c54e49da2dd3cceb1c5f5d29bfc85"},
|
||||||
|
{file = "psutil-5.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:f4cb67215c10d4657e320037109939b1c1d2fd70ca3d76301992f89fe2edb1f1"},
|
||||||
|
{file = "psutil-5.9.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dc9bda7d5ced744622f157cc8d8bdd51735dafcecff807e928ff26bdb0ff097d"},
|
||||||
|
{file = "psutil-5.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75291912b945a7351d45df682f9644540d564d62115d4a20d45fa17dc2d48f8"},
|
||||||
|
{file = "psutil-5.9.2-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4018d5f9b6651f9896c7a7c2c9f4652e4eea53f10751c4e7d08a9093ab587ec"},
|
||||||
|
{file = "psutil-5.9.2-cp36-cp36m-win32.whl", hash = "sha256:f40ba362fefc11d6bea4403f070078d60053ed422255bd838cd86a40674364c9"},
|
||||||
|
{file = "psutil-5.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9770c1d25aee91417eba7869139d629d6328a9422ce1cdd112bd56377ca98444"},
|
||||||
|
{file = "psutil-5.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:42638876b7f5ef43cef8dcf640d3401b27a51ee3fa137cb2aa2e72e188414c32"},
|
||||||
|
{file = "psutil-5.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91aa0dac0c64688667b4285fa29354acfb3e834e1fd98b535b9986c883c2ce1d"},
|
||||||
|
{file = "psutil-5.9.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fb54941aac044a61db9d8eb56fc5bee207db3bc58645d657249030e15ba3727"},
|
||||||
|
{file = "psutil-5.9.2-cp37-cp37m-win32.whl", hash = "sha256:7cbb795dcd8ed8fd238bc9e9f64ab188f3f4096d2e811b5a82da53d164b84c3f"},
|
||||||
|
{file = "psutil-5.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5d39e3a2d5c40efa977c9a8dd4f679763c43c6c255b1340a56489955dbca767c"},
|
||||||
|
{file = "psutil-5.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fd331866628d18223a4265371fd255774affd86244fc307ef66eaf00de0633d5"},
|
||||||
|
{file = "psutil-5.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b315febaebae813326296872fdb4be92ad3ce10d1d742a6b0c49fb619481ed0b"},
|
||||||
|
{file = "psutil-5.9.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7929a516125f62399d6e8e026129c8835f6c5a3aab88c3fff1a05ee8feb840d"},
|
||||||
|
{file = "psutil-5.9.2-cp38-cp38-win32.whl", hash = "sha256:561dec454853846d1dd0247b44c2e66a0a0c490f937086930ec4b8f83bf44f06"},
|
||||||
|
{file = "psutil-5.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:67b33f27fc0427483b61563a16c90d9f3b547eeb7af0ef1b9fe024cdc9b3a6ea"},
|
||||||
|
{file = "psutil-5.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3591616fa07b15050b2f87e1cdefd06a554382e72866fcc0ab2be9d116486c8"},
|
||||||
|
{file = "psutil-5.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b29f581b5edab1f133563272a6011925401804d52d603c5c606936b49c8b97"},
|
||||||
|
{file = "psutil-5.9.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4642fd93785a29353d6917a23e2ac6177308ef5e8be5cc17008d885cb9f70f12"},
|
||||||
|
{file = "psutil-5.9.2-cp39-cp39-win32.whl", hash = "sha256:ed29ea0b9a372c5188cdb2ad39f937900a10fb5478dc077283bf86eeac678ef1"},
|
||||||
|
{file = "psutil-5.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:68b35cbff92d1f7103d8f1db77c977e72f49fcefae3d3d2b91c76b0e7aef48b8"},
|
||||||
|
{file = "psutil-5.9.2.tar.gz", hash = "sha256:feb861a10b6c3bb00701063b37e4afc754f8217f0f09c42280586bd6ac712b5c"},
|
||||||
|
]
|
||||||
|
pyparsing = [
|
||||||
|
{file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
|
||||||
|
{file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
|
||||||
|
]
|
||||||
|
PyYAML = [
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
|
||||||
|
{file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
|
||||||
|
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
|
||||||
|
]
|
||||||
|
setuptools = [
|
||||||
|
{file = "setuptools-65.4.1-py3-none-any.whl", hash = "sha256:1b6bdc6161661409c5f21508763dc63ab20a9ac2f8ba20029aaaa7fdb9118012"},
|
||||||
|
{file = "setuptools-65.4.1.tar.gz", hash = "sha256:3050e338e5871e70c72983072fe34f6032ae1cdeeeb67338199c2f74e083a80e"},
|
||||||
|
]
|
||||||
|
six = [
|
||||||
|
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||||
|
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||||
|
]
|
||||||
|
torch = [
|
||||||
|
{file = "torch-1.12.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9c038662db894a23e49e385df13d47b2a777ffd56d9bcd5b832593fab0a7e286"},
|
||||||
|
{file = "torch-1.12.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:4e1b9c14cf13fd2ab8d769529050629a0e68a6fc5cb8e84b4a3cc1dd8c4fe541"},
|
||||||
|
{file = "torch-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:e9c8f4a311ac29fc7e8e955cfb7733deb5dbe1bdaabf5d4af2765695824b7e0d"},
|
||||||
|
{file = "torch-1.12.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:976c3f997cea38ee91a0dd3c3a42322785414748d1761ef926b789dfa97c6134"},
|
||||||
|
{file = "torch-1.12.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:68104e4715a55c4bb29a85c6a8d57d820e0757da363be1ba680fa8cc5be17b52"},
|
||||||
|
{file = "torch-1.12.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:743784ccea0dc8f2a3fe6a536bec8c4763bd82c1352f314937cb4008d4805de1"},
|
||||||
|
{file = "torch-1.12.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b5dbcca369800ce99ba7ae6dee3466607a66958afca3b740690d88168752abcf"},
|
||||||
|
{file = "torch-1.12.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f3b52a634e62821e747e872084ab32fbcb01b7fa7dbb7471b6218279f02a178a"},
|
||||||
|
{file = "torch-1.12.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:8a34a2fbbaa07c921e1b203f59d3d6e00ed379f2b384445773bd14e328a5b6c8"},
|
||||||
|
{file = "torch-1.12.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:42f639501928caabb9d1d55ddd17f07cd694de146686c24489ab8c615c2871f2"},
|
||||||
|
{file = "torch-1.12.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0b44601ec56f7dd44ad8afc00846051162ef9c26a8579dda0a02194327f2d55e"},
|
||||||
|
{file = "torch-1.12.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:cd26d8c5640c3a28c526d41ccdca14cf1cbca0d0f2e14e8263a7ac17194ab1d2"},
|
||||||
|
{file = "torch-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:42e115dab26f60c29e298559dbec88444175528b729ae994ec4c65d56fe267dd"},
|
||||||
|
{file = "torch-1.12.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:a8320ba9ad87e80ca5a6a016e46ada4d1ba0c54626e135d99b2129a4541c509d"},
|
||||||
|
{file = "torch-1.12.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:03e31c37711db2cd201e02de5826de875529e45a55631d317aadce2f1ed45aa8"},
|
||||||
|
{file = "torch-1.12.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9b356aea223772cd754edb4d9ecf2a025909b8615a7668ac7d5130f86e7ec421"},
|
||||||
|
{file = "torch-1.12.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:6cf6f54b43c0c30335428195589bd00e764a6d27f3b9ba637aaa8c11aaf93073"},
|
||||||
|
{file = "torch-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:f00c721f489089dc6364a01fd84906348fe02243d0af737f944fddb36003400d"},
|
||||||
|
{file = "torch-1.12.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:bfec2843daa654f04fda23ba823af03e7b6f7650a873cdb726752d0e3718dada"},
|
||||||
|
{file = "torch-1.12.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:69fe2cae7c39ccadd65a123793d30e0db881f1c1927945519c5c17323131437e"},
|
||||||
|
]
|
||||||
|
typer = [
|
||||||
|
{file = "typer-0.6.1-py3-none-any.whl", hash = "sha256:54b19e5df18654070a82f8c2aa1da456a4ac16a2a83e6dcd9f170e291c56338e"},
|
||||||
|
{file = "typer-0.6.1.tar.gz", hash = "sha256:2d5720a5e63f73eaf31edaa15f6ab87f35f0690f8ca233017d7d23d743a91d73"},
|
||||||
|
]
|
||||||
|
typing-extensions = [
|
||||||
|
{file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"},
|
||||||
|
{file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"},
|
||||||
|
]
|
|
@ -0,0 +1,21 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "bloom-inference"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "BLOOM Inference Python gRPC Server"
|
||||||
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.9"
|
||||||
|
protobuf = "^4.21.7"
|
||||||
|
grpcio = "^1.49.1"
|
||||||
|
torch = "^1.12.1"
|
||||||
|
typer = "^0.6.1"
|
||||||
|
grpcio-reflection = "^1.49.1"
|
||||||
|
accelerate = "^0.12.0"
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
grpcio-tools = "^1.49.1"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
Loading…
Reference in New Issue