fix(launcher): revert change on shard errors (#173)

This commit is contained in:
OlivierDehaene 2023-04-13 11:07:11 +02:00 committed by GitHub
parent 880a76eed5
commit e3a63b6fbc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 13 additions and 7 deletions

View File

@ -349,8 +349,8 @@ fn main() -> ExitCode {
Err(TryRecvError::Empty) => { Err(TryRecvError::Empty) => {
sleep(Duration::from_millis(100)); sleep(Duration::from_millis(100));
} }
Ok(ShardStatus::Failed(rank)) => { Ok(ShardStatus::Failed((rank, err))) => {
tracing::error!("Shard {} failed to start.", rank); tracing::error!("Shard {} failed to start:\n{}", rank, err);
shutdown_shards(shutdown, &shutdown_receiver); shutdown_shards(shutdown, &shutdown_receiver);
return ExitCode::FAILURE; return ExitCode::FAILURE;
} }
@ -457,8 +457,8 @@ fn main() -> ExitCode {
let mut exit_code = ExitCode::SUCCESS; let mut exit_code = ExitCode::SUCCESS;
while running.load(Ordering::SeqCst) { while running.load(Ordering::SeqCst) {
if let Ok(ShardStatus::Failed(rank)) = status_receiver.try_recv() { if let Ok(ShardStatus::Failed((rank, err))) = status_receiver.try_recv() {
tracing::error!("Shard {rank} failed."); tracing::error!("Shard {rank} failed:\n{err}");
exit_code = ExitCode::FAILURE; exit_code = ExitCode::FAILURE;
break; break;
}; };
@ -488,7 +488,7 @@ fn main() -> ExitCode {
#[derive(Debug)] #[derive(Debug)]
enum ShardStatus { enum ShardStatus {
Ready, Ready,
Failed(usize), Failed((usize, String)),
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
@ -627,7 +627,9 @@ fn shard_manager(
tracing::error!("Please install it with `make install-server`") tracing::error!("Please install it with `make install-server`")
} }
} }
status_sender.send(ShardStatus::Failed(rank)).unwrap(); status_sender
.send(ShardStatus::Failed((rank, err.to_string())))
.unwrap();
return; return;
} }
}; };
@ -656,7 +658,11 @@ fn shard_manager(
loop { loop {
// Process exited // Process exited
if p.poll().is_some() { if p.poll().is_some() {
status_sender.send(ShardStatus::Failed(rank)).unwrap(); let mut err = String::new();
p.stderr.take().unwrap().read_to_string(&mut err).unwrap();
status_sender
.send(ShardStatus::Failed((rank, err)))
.unwrap();
return; return;
} }