fix(launcher): revert change on shard errors (#173)
This commit is contained in:
parent
880a76eed5
commit
e3a63b6fbc
|
@ -349,8 +349,8 @@ fn main() -> ExitCode {
|
||||||
Err(TryRecvError::Empty) => {
|
Err(TryRecvError::Empty) => {
|
||||||
sleep(Duration::from_millis(100));
|
sleep(Duration::from_millis(100));
|
||||||
}
|
}
|
||||||
Ok(ShardStatus::Failed(rank)) => {
|
Ok(ShardStatus::Failed((rank, err))) => {
|
||||||
tracing::error!("Shard {} failed to start.", rank);
|
tracing::error!("Shard {} failed to start:\n{}", rank, err);
|
||||||
shutdown_shards(shutdown, &shutdown_receiver);
|
shutdown_shards(shutdown, &shutdown_receiver);
|
||||||
return ExitCode::FAILURE;
|
return ExitCode::FAILURE;
|
||||||
}
|
}
|
||||||
|
@ -457,8 +457,8 @@ fn main() -> ExitCode {
|
||||||
let mut exit_code = ExitCode::SUCCESS;
|
let mut exit_code = ExitCode::SUCCESS;
|
||||||
|
|
||||||
while running.load(Ordering::SeqCst) {
|
while running.load(Ordering::SeqCst) {
|
||||||
if let Ok(ShardStatus::Failed(rank)) = status_receiver.try_recv() {
|
if let Ok(ShardStatus::Failed((rank, err))) = status_receiver.try_recv() {
|
||||||
tracing::error!("Shard {rank} failed.");
|
tracing::error!("Shard {rank} failed:\n{err}");
|
||||||
exit_code = ExitCode::FAILURE;
|
exit_code = ExitCode::FAILURE;
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
|
@ -488,7 +488,7 @@ fn main() -> ExitCode {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum ShardStatus {
|
enum ShardStatus {
|
||||||
Ready,
|
Ready,
|
||||||
Failed(usize),
|
Failed((usize, String)),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
@ -627,7 +627,9 @@ fn shard_manager(
|
||||||
tracing::error!("Please install it with `make install-server`")
|
tracing::error!("Please install it with `make install-server`")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
status_sender.send(ShardStatus::Failed(rank)).unwrap();
|
status_sender
|
||||||
|
.send(ShardStatus::Failed((rank, err.to_string())))
|
||||||
|
.unwrap();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -656,7 +658,11 @@ fn shard_manager(
|
||||||
loop {
|
loop {
|
||||||
// Process exited
|
// Process exited
|
||||||
if p.poll().is_some() {
|
if p.poll().is_some() {
|
||||||
status_sender.send(ShardStatus::Failed(rank)).unwrap();
|
let mut err = String::new();
|
||||||
|
p.stderr.take().unwrap().read_to_string(&mut err).unwrap();
|
||||||
|
status_sender
|
||||||
|
.send(ShardStatus::Failed((rank, err)))
|
||||||
|
.unwrap();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue