workaround for cuda errors on some rented machines

This commit is contained in:
Victor Hall 2023-11-15 14:45:43 -05:00
parent a95d2de150
commit e95a8861e9
1 changed files with 5 additions and 1 deletions

View File

@ -34,7 +34,11 @@ def _collect_rng_states(include_cuda: bool = True) -> Dict[str, Any]:
"python": python_get_rng_state(),
}
if include_cuda:
states["torch.cuda"] = torch.cuda.get_rng_state_all()
try:
states["torch.cuda"] = torch.cuda.get_rng_state_all()
except RuntimeError:
# CUDA initialization failure.
pass
return states