Spaces:
Running on Zero
Running on Zero
| import sys | |
| import types | |
| import pytest | |
| from hackathon_advisor.dashboard_chat_contracts import parse_native_tool_call | |
| from hackathon_advisor.model_runtime import ( | |
| DEFAULT_ADAPTER_ID, | |
| DEFAULT_ADAPTER_REVISION, | |
| MiniCPMChatRunner, | |
| MiniCPMTransformersPlanner, | |
| RuleBasedChatRunner, | |
| RuleBasedPlanner, | |
| create_chat_runner, | |
| create_tool_planner, | |
| generation_lock, | |
| render_context, | |
| runtime_status, | |
| system_prompt, | |
| _best_local_device, | |
| _minicpm_generation_kwargs, | |
| _load_minicpm_causal_lm, | |
| _minicpm_chat_inputs, | |
| _minicpm_chat_inputs_with_tools, | |
| _normalize_xml_tool_output, | |
| _resolve_torch_device, | |
| _strip_unused_generation_inputs, | |
| ) | |
| from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled | |
| class FakeBackends: | |
| def __init__(self, mps: bool) -> None: | |
| self.mps = type("MPS", (), {"is_available": staticmethod(lambda: mps)})() | |
| class FakeTorch: | |
| def __init__(self, cuda: bool = False, mps: bool = False) -> None: | |
| self.bfloat16 = "bfloat16" | |
| self.float32 = "float32" | |
| self.cuda = type("CUDA", (), {"is_available": staticmethod(lambda: cuda)})() | |
| self.backends = FakeBackends(mps) | |
| class FakeInputs(dict): | |
| def to(self, device): | |
| self["device"] = device | |
| return self | |
| class FakeTokenizer: | |
| def __init__(self) -> None: | |
| self.template_call = None | |
| self.tokenizer_call = None | |
| def apply_chat_template(self, messages, *, tokenize, add_generation_prompt, enable_thinking): | |
| self.template_call = { | |
| "messages": messages, | |
| "tokenize": tokenize, | |
| "add_generation_prompt": add_generation_prompt, | |
| "enable_thinking": enable_thinking, | |
| } | |
| return "rendered prompt" | |
| def __call__(self, prompts, *, return_tensors): | |
| self.tokenizer_call = {"prompts": prompts, "return_tensors": return_tensors} | |
| return FakeInputs({"input_ids": [1], "attention_mask": [1], "token_type_ids": [0]}) | |
| class FakeMiniCPMModel: | |
| last_instance = None | |
| def from_pretrained(cls, model_id, **kwargs): | |
| instance = cls() | |
| instance.model_id = model_id | |
| instance.kwargs = kwargs | |
| instance.device = None | |
| cls.last_instance = instance | |
| return instance | |
| def to(self, device): | |
| self.device = device | |
| return self | |
| class FakeToolsTokenizer(FakeTokenizer): | |
| """FakeTokenizer that also records the native tools= template path.""" | |
| def apply_chat_template( | |
| self, messages, *, tokenize, add_generation_prompt, enable_thinking, tools=None | |
| ): | |
| self.template_call = { | |
| "messages": messages, | |
| "tokenize": tokenize, | |
| "add_generation_prompt": add_generation_prompt, | |
| "enable_thinking": enable_thinking, | |
| } | |
| if tools is not None: | |
| self.template_call["tools"] = tools | |
| return "rendered prompt" | |
| class FakeStreamer: | |
| """Stands in for transformers.TextIteratorStreamer in the worker-thread flow.""" | |
| def __init__(self, tokenizer, *, skip_prompt, skip_special_tokens) -> None: | |
| import queue | |
| self._queue: queue.Queue = queue.Queue() | |
| def put(self, piece) -> None: | |
| self._queue.put(piece) | |
| def end(self) -> None: | |
| self._queue.put(None) | |
| def __iter__(self): | |
| while True: | |
| piece = self._queue.get() | |
| if piece is None: | |
| return | |
| yield piece | |
| class FakeParameter: | |
| device = "cpu" | |
| class FakeAdapterContext: | |
| def __init__(self, log: list[str]) -> None: | |
| self._log = log | |
| def __enter__(self): | |
| self._log.append("adapter_disabled") | |
| return self | |
| def __exit__(self, *exc_info): | |
| self._log.append("adapter_restored") | |
| return False | |
| class FakeChatModel: | |
| def __init__(self, pieces: tuple[str, ...], adapter_log: list[str] | None = None) -> None: | |
| self.pieces = pieces | |
| self.adapter_log = adapter_log | |
| self.generate_calls: list[dict] = [] | |
| self.lock_was_held: list[bool] = [] | |
| def parameters(self): | |
| return iter([FakeParameter()]) | |
| def generate(self, **kwargs) -> None: | |
| self.lock_was_held.append(generation_lock().locked()) | |
| self.generate_calls.append(kwargs) | |
| streamer = kwargs["streamer"] | |
| for piece in self.pieces: | |
| streamer.put(piece) | |
| streamer.end() | |
| class FakeAdapterChatModel(FakeChatModel): | |
| def disable_adapter(self): | |
| assert self.adapter_log is not None | |
| return FakeAdapterContext(self.adapter_log) | |
| def fake_transformers(monkeypatch: pytest.MonkeyPatch): | |
| module = types.SimpleNamespace(TextIteratorStreamer=FakeStreamer) | |
| monkeypatch.setitem(sys.modules, "transformers", module) | |
| return module | |
| def chat_runner_with(model: FakeChatModel) -> MiniCPMChatRunner: | |
| planner = MiniCPMTransformersPlanner( | |
| "openbmb/MiniCPM5-1B", | |
| adapter_id="build-small-hackathon/some-lora" if hasattr(model, "disable_adapter") else "", | |
| ) | |
| planner._model = model | |
| planner._tokenizer = FakeToolsTokenizer() | |
| return MiniCPMChatRunner(planner) | |
| def test_chat_inputs_with_tools_passes_native_tools() -> None: | |
| tokenizer = FakeToolsTokenizer() | |
| tools = [{"type": "function", "function": {"name": "list_quests"}}] | |
| inputs = _minicpm_chat_inputs_with_tools( | |
| tokenizer, | |
| [{"role": "user", "content": "hello"}], | |
| tools=tools, | |
| enable_thinking=False, | |
| device="cpu", | |
| ) | |
| assert tokenizer.template_call["tools"] == tools | |
| assert tokenizer.template_call["enable_thinking"] is False | |
| assert inputs == {"input_ids": [1], "attention_mask": [1], "device": "cpu"} | |
| def test_chat_runner_streams_under_lock_with_adapter_disabled(fake_transformers) -> None: | |
| adapter_log: list[str] = [] | |
| model = FakeAdapterChatModel(("<function ", 'name="list_quests">', "</function>"), adapter_log) | |
| runner = chat_runner_with(model) | |
| pieces = list( | |
| runner.stream( | |
| [{"role": "user", "content": "what quests exist"}], | |
| tools=[{"type": "function", "function": {"name": "list_quests"}}], | |
| max_new_tokens=96, | |
| ) | |
| ) | |
| assert [piece for _count, piece in pieces] == [ | |
| "<function ", | |
| 'name="list_quests">', | |
| "</function>", | |
| ] | |
| assert [count for count, _piece in pieces] == [1, 2, 3] | |
| assert adapter_log == ["adapter_disabled", "adapter_restored"] | |
| assert model.lock_was_held == [True] | |
| assert generation_lock().locked() is False | |
| assert model.generate_calls[0]["max_new_tokens"] == 96 | |
| assert model.generate_calls[0]["do_sample"] is False | |
| template_call = runner._planner._tokenizer.template_call | |
| assert "tools" in template_call | |
| def test_chat_runner_forwards_enable_thinking_to_the_template(fake_transformers) -> None: | |
| model = FakeChatModel(("thoughts</think>\n\nanswer",)) | |
| runner = chat_runner_with(model) | |
| list( | |
| runner.stream( | |
| [{"role": "user", "content": "hi"}], | |
| tools=[{"type": "function"}], | |
| max_new_tokens=4096, | |
| enable_thinking=True, | |
| ) | |
| ) | |
| template_call = runner._planner._tokenizer.template_call | |
| assert template_call["enable_thinking"] is True | |
| assert model.generate_calls[0]["max_new_tokens"] == 4096 | |
| assert MiniCPMChatRunner.supports_thinking is True | |
| assert RuleBasedChatRunner.supports_thinking is False | |
| def test_chat_runner_answer_pass_omits_tools_and_adapter_toggle(fake_transformers) -> None: | |
| model = FakeChatModel(("The map ", "shows ten projects.")) | |
| runner = chat_runner_with(model) | |
| pieces = list( | |
| runner.stream( | |
| [ | |
| {"role": "user", "content": "what is everyone building"}, | |
| {"role": "assistant", "content": "", "tool_calls": []}, | |
| {"role": "tool", "content": "{}"}, | |
| ], | |
| max_new_tokens=200, | |
| ) | |
| ) | |
| assert "".join(piece for _count, piece in pieces) == "The map shows ten projects." | |
| assert model.lock_was_held == [True] | |
| template_call = runner._planner._tokenizer.template_call | |
| assert "tools" not in template_call | |
| def test_chat_runner_surfaces_generation_errors(fake_transformers) -> None: | |
| class ExplodingModel(FakeChatModel): | |
| def generate(self, **kwargs) -> None: | |
| kwargs["streamer"].end() | |
| raise RuntimeError("boom") | |
| runner = chat_runner_with(ExplodingModel(())) | |
| with pytest.raises(RuntimeError, match="boom"): | |
| list(runner.stream([{"role": "user", "content": "hi"}], max_new_tokens=10)) | |
| assert generation_lock().locked() is False | |
| def test_early_close_releases_generation_lock(fake_transformers) -> None: | |
| model = FakeChatModel(("tok1 ", "tok2 ", "tok3 ", "tok4 ", "tok5")) | |
| runner = chat_runner_with(model) | |
| stream = runner.stream([{"role": "user", "content": "hi"}], max_new_tokens=32) | |
| next(stream) # consume one piece then abandon mid-stream | |
| stream.close() | |
| assert generation_lock().locked() is False | |
| def test_rule_chat_runner_escapes_xml_special_characters() -> None: | |
| runner = RuleBasedChatRunner() | |
| output = "".join( | |
| piece | |
| for _count, piece in runner.stream( | |
| [{"role": "user", "content": "find projects about A & B <robots>"}], | |
| tools=[{"type": "function"}], | |
| max_new_tokens=96, | |
| ) | |
| ) | |
| call = parse_native_tool_call(output) | |
| assert call.name == "search_projects" | |
| assert call.arguments["query"] == "find projects about A & B <robots>" | |
| def test_rule_chat_runner_routes_tools_pass_through_intents() -> None: | |
| runner = RuleBasedChatRunner() | |
| output = "".join( | |
| piece | |
| for _count, piece in runner.stream( | |
| [{"role": "user", "content": "who completed the most quests"}], | |
| tools=[{"type": "function"}], | |
| max_new_tokens=96, | |
| ) | |
| ) | |
| call = parse_native_tool_call(output) | |
| assert call.name == "top_projects_by_quests" | |
| def test_rule_chat_runner_answer_pass_is_deterministic() -> None: | |
| runner = RuleBasedChatRunner() | |
| output = "".join( | |
| piece | |
| for _count, piece in runner.stream( | |
| [{"role": "user", "content": "hi"}, {"role": "tool", "content": "{}"}], | |
| max_new_tokens=200, | |
| ) | |
| ) | |
| assert "verified data" in output | |
| def test_create_chat_runner_matches_advisor_backend() -> None: | |
| minicpm = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B") | |
| assert isinstance(create_chat_runner(minicpm), MiniCPMChatRunner) | |
| assert isinstance(create_chat_runner(RuleBasedPlanner()), RuleBasedChatRunner) | |
| def test_base_model_context_is_null_without_adapter() -> None: | |
| planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B", adapter_id="") | |
| planner._model = FakeChatModel(()) | |
| with planner.base_model_context(): | |
| pass # no adapter -> nullcontext, nothing to toggle | |
| def test_rule_planner_emits_valid_search_call() -> None: | |
| planner = RuleBasedPlanner() | |
| resolution = planner.plan("search similar lullaby audio projects", {}) | |
| assert resolution.status == "valid" | |
| assert resolution.call.name == "search_projects" | |
| assert resolution.call.arguments["query"] == "search similar lullaby audio projects" | |
| def test_rule_planner_uses_plan_when_idea_exists() -> None: | |
| planner = RuleBasedPlanner() | |
| resolution = planner.plan("make a build plan", {"ideas": [{"title": "A", "pitch": "B"}]}) | |
| assert resolution.status == "valid" | |
| assert resolution.call.name == "make_plan" | |
| def test_rule_planner_keeps_empty_board_commands_as_commands() -> None: | |
| planner = RuleBasedPlanner() | |
| plan = planner.plan("make a build plan", {}) | |
| rank = planner.plan("compare ideas", {}) | |
| assert plan.status == "valid" | |
| assert plan.call.name == "make_plan" | |
| assert rank.status == "valid" | |
| assert rank.call.name == "compare_ideas" | |
| def test_rule_planner_defaults_blank_to_list_projects() -> None: | |
| planner = RuleBasedPlanner() | |
| resolution = planner.plan("", {}) | |
| assert resolution.status == "valid" | |
| assert resolution.call.name == "list_projects" | |
| def test_rule_planner_routes_project_reference_commands() -> None: | |
| planner = RuleBasedPlanner() | |
| listed = planner.plan("show current map", {}) | |
| project = planner.plan("read project lolaby", {}) | |
| project_url = planner.plan("open space https://huggingface.co/spaces/build-small-hackathon/lolaby", {}) | |
| assert listed.status == "valid" | |
| assert listed.call.name == "list_projects" | |
| assert project.status == "valid" | |
| assert project.call.name == "get_project" | |
| assert project.call.arguments["id"] == "lolaby" | |
| assert project_url.status == "valid" | |
| assert project_url.call.name == "get_project" | |
| assert project_url.call.arguments["id"] == "build-small-hackathon/lolaby" | |
| def test_rule_planner_keeps_project_words_inside_ideas() -> None: | |
| planner = RuleBasedPlanner() | |
| resolution = planner.plan("A dashboard that helps teams show projects to mentors", {}) | |
| assert resolution.status == "valid" | |
| assert resolution.call.name == "save_idea" | |
| def test_rule_planner_does_not_match_commands_inside_idea_words() -> None: | |
| planner = RuleBasedPlanner() | |
| planting = planner.plan( | |
| "A neighborhood seed swap archive that reminds gardeners when to plant shared seeds", | |
| {}, | |
| ) | |
| cooking_plan = planner.plan( | |
| "A countertop helper that turns pantry leftovers into a weekly cooking plan", | |
| {}, | |
| ) | |
| assert planting.status == "valid" | |
| assert planting.call.name == "save_idea" | |
| assert cooking_plan.status == "valid" | |
| assert cooking_plan.call.name == "save_idea" | |
| def test_rule_planner_splits_explicit_idea_pitch() -> None: | |
| planner = RuleBasedPlanner() | |
| resolution = planner.plan( | |
| "idea: Hands-on science coach -- A lab-notebook companion for household experiments.", | |
| {}, | |
| ) | |
| assert resolution.status == "valid" | |
| assert resolution.call.name == "save_idea" | |
| assert resolution.call.arguments["title"] == "Hands-on science coach" | |
| assert resolution.call.arguments["pitch"] == "A lab-notebook companion for household experiments." | |
| def test_render_context_includes_state() -> None: | |
| context = render_context( | |
| "make a plan", | |
| { | |
| "ideas": [{"title": "Archive Cartographer", "pitch": "Map family memories."}], | |
| "trace": [{"input": "first", "verdict": "ECHO x2", "overall": 5.1}], | |
| }, | |
| ) | |
| assert "Archive Cartographer" in context | |
| assert "ECHO x2" in context | |
| assert '<function name="tool_name">' in context | |
| assert "Available tools:" in context | |
| assert "search_projects" in context | |
| def test_system_prompt_keeps_runtime_role_user_facing() -> None: | |
| prompt = system_prompt() | |
| assert "The Unwritten Almanac" in prompt | |
| assert "Mothback" not in prompt | |
| assert "Build Small" not in prompt | |
| def test_create_tool_planner_defaults_to_minicpm(monkeypatch: pytest.MonkeyPatch) -> None: | |
| monkeypatch.delenv("ADVISOR_MODEL_BACKEND", raising=False) | |
| monkeypatch.delenv("ADVISOR_ADAPTER_ID", raising=False) | |
| monkeypatch.delenv("ADVISOR_ADAPTER_REVISION", raising=False) | |
| planner = create_tool_planner() | |
| status = runtime_status(planner).to_dict() | |
| assert isinstance(planner, MiniCPMTransformersPlanner) | |
| assert status["backend"] == "minicpm-transformers" | |
| assert status["loaded"] is False | |
| assert status["adapter_id"] == DEFAULT_ADAPTER_ID | |
| assert status["adapter_revision"] == DEFAULT_ADAPTER_REVISION | |
| def test_create_tool_planner_accepts_explicit_rules_backend(monkeypatch: pytest.MonkeyPatch) -> None: | |
| monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "rules") | |
| planner = create_tool_planner() | |
| assert isinstance(planner, RuleBasedPlanner) | |
| assert runtime_status(planner).to_dict()["loaded"] is True | |
| def test_create_tool_planner_accepts_adapter_env(monkeypatch: pytest.MonkeyPatch) -> None: | |
| monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers") | |
| monkeypatch.setenv("ADVISOR_MODEL_ID", "openbmb/MiniCPM5-1B") | |
| monkeypatch.setenv("ADVISOR_ADAPTER_ID", DEFAULT_ADAPTER_ID) | |
| monkeypatch.setenv("ADVISOR_ADAPTER_REVISION", "abc123") | |
| planner = create_tool_planner() | |
| status = runtime_status(planner).to_dict() | |
| assert isinstance(planner, MiniCPMTransformersPlanner) | |
| assert status["backend"] == "minicpm-transformers" | |
| assert status["model_id"] == "openbmb/MiniCPM5-1B" | |
| assert status["adapter_id"] == DEFAULT_ADAPTER_ID | |
| assert status["adapter_revision"] == "abc123" | |
| assert status["loaded"] is False | |
| def test_create_tool_planner_rejects_unknown_backend(monkeypatch: pytest.MonkeyPatch) -> None: | |
| monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "bogus") | |
| with pytest.raises(RuntimeError, match="Unsupported"): | |
| create_tool_planner() | |
| def test_minicpm_status_is_lazy() -> None: | |
| planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B", DEFAULT_ADAPTER_ID) | |
| status = runtime_status(planner).to_dict() | |
| assert status["backend"] == "minicpm-transformers" | |
| assert status["adapter_id"] == DEFAULT_ADAPTER_ID | |
| assert status["adapter_revision"] == "" | |
| assert status["loaded"] is False | |
| def test_zerogpu_disabled_leaves_function_unwrapped(monkeypatch: pytest.MonkeyPatch) -> None: | |
| monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False) | |
| def marker() -> str: | |
| return "ok" | |
| assert zero_gpu_enabled() is False | |
| assert gpu_task(marker) is marker | |
| def test_zerogpu_duration_validates_positive_values(monkeypatch: pytest.MonkeyPatch) -> None: | |
| monkeypatch.setenv("ADVISOR_ZERO_GPU_DURATION", "7") | |
| assert zero_gpu_duration_seconds() == 7 | |
| monkeypatch.setenv("ADVISOR_ZERO_GPU_DURATION", "0") | |
| with pytest.raises(RuntimeError, match="positive"): | |
| zero_gpu_duration_seconds() | |
| monkeypatch.setenv("ADVISOR_ZERO_GPU_DURATION", "121") | |
| with pytest.raises(RuntimeError, match="at most 120"): | |
| zero_gpu_duration_seconds() | |
| def test_generation_inputs_drop_token_type_ids() -> None: | |
| inputs = {"input_ids": [1], "attention_mask": [1], "token_type_ids": [0]} | |
| _strip_unused_generation_inputs(inputs) | |
| assert inputs == {"input_ids": [1], "attention_mask": [1]} | |
| def test_minicpm_loader_matches_official_cuda_dtype() -> None: | |
| model = _load_minicpm_causal_lm(FakeMiniCPMModel, "openbmb/MiniCPM5-1B", "cuda", FakeTorch()) | |
| assert model.model_id == "openbmb/MiniCPM5-1B" | |
| assert model.kwargs == {"torch_dtype": "bfloat16", "trust_remote_code": True} | |
| assert model.device == "cuda" | |
| def test_minicpm_loader_uses_device_map_for_auto() -> None: | |
| model = _load_minicpm_causal_lm(FakeMiniCPMModel, "openbmb/MiniCPM5-1B", "auto", FakeTorch()) | |
| assert model.kwargs == { | |
| "torch_dtype": "bfloat16", | |
| "device_map": "auto", | |
| "trust_remote_code": True, | |
| } | |
| assert model.device is None | |
| def test_minicpm_chat_inputs_follow_official_template_flow() -> None: | |
| tokenizer = FakeTokenizer() | |
| inputs = _minicpm_chat_inputs( | |
| tokenizer, | |
| [{"role": "user", "content": "hello"}], | |
| enable_thinking=False, | |
| device="cuda", | |
| ) | |
| assert tokenizer.template_call == { | |
| "messages": [{"role": "user", "content": "hello"}], | |
| "tokenize": False, | |
| "add_generation_prompt": True, | |
| "enable_thinking": False, | |
| } | |
| assert tokenizer.tokenizer_call == {"prompts": ["rendered prompt"], "return_tensors": "pt"} | |
| assert inputs == {"input_ids": [1], "attention_mask": [1], "device": "cuda"} | |
| def test_minicpm_generation_kwargs_match_demo_sampling_policy() -> None: | |
| inputs = {"input_ids": [1], "attention_mask": [1]} | |
| sampled = _minicpm_generation_kwargs(inputs, max_new_tokens=32, temperature=0.9, top_p=0.95) | |
| deterministic = _minicpm_generation_kwargs(inputs, max_new_tokens=32, temperature=0.0) | |
| assert sampled == { | |
| "input_ids": [1], | |
| "attention_mask": [1], | |
| "max_new_tokens": 32, | |
| "temperature": 0.9, | |
| "top_p": 0.95, | |
| "do_sample": True, | |
| } | |
| assert deterministic == { | |
| "input_ids": [1], | |
| "attention_mask": [1], | |
| "max_new_tokens": 32, | |
| "do_sample": False, | |
| } | |
| def test_model_xml_fragment_is_normalized() -> None: | |
| output = 'name="save_idea">{"title":"A","pitch":"B"}' | |
| assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>' | |
| def test_resolve_device_keeps_auto_and_explicit_cpu() -> None: | |
| assert _resolve_torch_device("auto", FakeTorch()) == "auto" | |
| assert _resolve_torch_device("cpu", FakeTorch(cuda=True, mps=True)) == "cpu" | |
| def test_resolve_device_prefers_cuda_then_mps_then_cpu(monkeypatch) -> None: | |
| monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False) | |
| assert _best_local_device(FakeTorch(cuda=True, mps=True)) == "cuda" | |
| assert _best_local_device(FakeTorch(cuda=False, mps=True)) == "mps" | |
| assert _best_local_device(FakeTorch(cuda=False, mps=False)) == "cpu" | |
| # "local" resolves through the same ladder | |
| assert _resolve_torch_device("local", FakeTorch(cuda=False, mps=True)) == "mps" | |
| def test_resolve_device_unavailable_request_degrades_gracefully(monkeypatch) -> None: | |
| monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False) | |
| # asking for cuda on an MPS-only box lands on mps, not a crash | |
| assert _resolve_torch_device("cuda", FakeTorch(cuda=False, mps=True)) == "mps" | |
| def test_resolve_device_skips_cuda_under_zero_gpu(monkeypatch) -> None: | |
| # In a ZeroGPU main process there is no local CUDA, and probing it is avoided. | |
| monkeypatch.setenv("ADVISOR_ZERO_GPU", "1") | |
| assert _best_local_device(FakeTorch(cuda=True, mps=False)) == "cpu" | |
| def test_runtime_status_reports_configured_device() -> None: | |
| planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B", device="local") | |
| assert runtime_status(planner).to_dict()["device"] == "local" | |
| assert runtime_status(RuleBasedPlanner()).to_dict()["device"] == "" | |