fix(server): Do not init process group if already initialized (#388)
This commit is contained in:
parent
aefde28b45
commit
ae466a8736
|
@ -2,6 +2,7 @@ import os
|
|||
import torch
|
||||
|
||||
from datetime import timedelta
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class FakeBarrier:
|
||||
|
@ -59,13 +60,17 @@ def initialize_torch_distributed():
|
|||
else:
|
||||
if os.getenv("DEBUG", None) == "1":
|
||||
return FakeGroup(rank, world_size), rank, world_size
|
||||
# Call the init process.
|
||||
torch.distributed.init_process_group(
|
||||
backend=backend,
|
||||
world_size=world_size,
|
||||
rank=rank,
|
||||
timeout=timedelta(seconds=60),
|
||||
pg_options=options,
|
||||
)
|
||||
|
||||
if not torch.distributed.is_initialized():
|
||||
# Call the init process.
|
||||
torch.distributed.init_process_group(
|
||||
backend=backend,
|
||||
world_size=world_size,
|
||||
rank=rank,
|
||||
timeout=timedelta(seconds=60),
|
||||
pg_options=options,
|
||||
)
|
||||
else:
|
||||
logger.warning("torch.distributed is already initialized.")
|
||||
|
||||
return torch.distributed.group.WORLD, rank, world_size
|
||||
|
|
Loading…
Reference in New Issue