workaround for cuda errors on some rented machines
This commit is contained in:
parent
a95d2de150
commit
e95a8861e9
|
@ -34,7 +34,11 @@ def _collect_rng_states(include_cuda: bool = True) -> Dict[str, Any]:
|
||||||
"python": python_get_rng_state(),
|
"python": python_get_rng_state(),
|
||||||
}
|
}
|
||||||
if include_cuda:
|
if include_cuda:
|
||||||
states["torch.cuda"] = torch.cuda.get_rng_state_all()
|
try:
|
||||||
|
states["torch.cuda"] = torch.cuda.get_rng_state_all()
|
||||||
|
except RuntimeError:
|
||||||
|
# CUDA initialization failure.
|
||||||
|
pass
|
||||||
return states
|
return states
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue