diff --git a/tests/test_allreduce.py b/tests/test_allreduce.py index 6114e86ce..ac2539e35 100644 --- a/tests/test_allreduce.py +++ b/tests/test_allreduce.py @@ -172,7 +172,6 @@ async def send_tensors(sender_index: int): ) @pytest.mark.forked @pytest.mark.asyncio -@pytest.mark.skip("Skipping test due to freezes in CI") async def test_allreduce_protocol(peer_modes, averaging_weights, peer_fractions, part_size_bytes): """Run group allreduce protocol manually without grpc, see if the internal logic is working as intended""" diff --git a/tests/test_moe.py b/tests/test_moe.py index 4cb6664c9..7ec90b925 100644 --- a/tests/test_moe.py +++ b/tests/test_moe.py @@ -21,7 +21,6 @@ @pytest.mark.forked -@pytest.mark.skip("Skipping test due to freezes in CI") def test_moe(): all_expert_uids = [ f"ffn.{np.random.randint(0, 3)}.{np.random.randint(0, 3)}.{np.random.randint(0, 3)}" for _ in range(10) @@ -66,7 +65,6 @@ def test_no_experts(): @pytest.mark.forked -@pytest.mark.skip(reason="Skipping call_many test due to freezes") def test_call_many(hidden_dim=16): k_min = 1 timeout_after_k_min = None diff --git a/tests/test_p2p_daemon.py b/tests/test_p2p_daemon.py index 0e0cd8b13..7e36470a0 100644 --- a/tests/test_p2p_daemon.py +++ b/tests/test_p2p_daemon.py @@ -46,7 +46,7 @@ async def test_startup_error_message(): ) with pytest.raises(P2PDaemonError, match=r"Daemon failed to start in .+ seconds"): - await P2P.create(startup_timeout=0.01) # Test that startup_timeout works + await P2P.create(startup_timeout=0.1) @pytest.mark.asyncio diff --git a/tests/test_start_server.py b/tests/test_start_server.py index 879a410e0..436e0d2e2 100644 --- a/tests/test_start_server.py +++ b/tests/test_start_server.py @@ -12,8 +12,8 @@ def cleanup_process(process, timeout=5): try: process.terminate() - process.wait(timeout=timeout) # Add timeout to wait - except: # noqa: E722 + process.wait(timeout=timeout) + except (ProcessLookupError, TimeoutError): process.kill() process.wait(timeout=timeout) diff --git a/tests/test_training.py b/tests/test_training.py index 4d7050c9b..94c7ea993 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -14,7 +14,6 @@ @pytest.mark.forked -@pytest.mark.skip("Skipping test due to freezes in CI") def test_training(max_steps: int = 100, threshold: float = 0.9): dataset = load_digits(n_class=2) X_train, y_train = torch.tensor(dataset["data"], dtype=torch.float), torch.tensor(dataset["target"]) @@ -55,7 +54,6 @@ def test_training(max_steps: int = 100, threshold: float = 0.9): @pytest.mark.forked -@pytest.mark.skip("Skipping test due to freezes in CI") def test_moe_training(max_steps: int = 100, threshold: float = 0.9, num_experts=2): dataset = load_digits(n_class=2) X_train, y_train = torch.tensor(dataset["data"], dtype=torch.float), torch.tensor(dataset["target"]) @@ -108,7 +106,6 @@ def forward(self, x): @pytest.mark.forked -@pytest.mark.skip("Skipping test due to freezes in CI") def test_switch_training(max_steps: int = 10, threshold: float = 0.9, num_experts=5): dataset = load_digits(n_class=2) X_train, y_train = torch.tensor(dataset["data"], dtype=torch.float), torch.tensor(dataset["target"])