Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

conftest.py +24 -0
run_backend_ops.py +46 -0
run_bench_tests.py +83 -0
run_scorecard.py +375 -0
utils.py +105 -0

conftest.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# ---------------------------------------------------------------------
+# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""Shared pytest fixtures for QDC on-device test runners."""
+import os
+import pytest
+from appium import webdriver
+from utils import options, write_qdc_log
+@pytest.fixture(scope="session", autouse=True)
+def driver():
+    return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
+def pytest_sessionfinish(session, exitstatus):
+    xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
+    if os.path.exists(xml_path):
+        with open(xml_path) as f:
+            write_qdc_log("results.xml", f.read())

run_backend_ops.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# ---------------------------------------------------------------------
+# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""
+On-device test-backend-ops runner for llama.cpp (HTP0 backend).
+Linux/IoT device version (QCS9075M, etc.)
+Executed by QDC's test framework on the QDC runner.
+The runner has SSH access to the allocated Linux device.
+"""
+import os
+import sys
+import pytest
+from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_shell_command, write_qdc_log
+@pytest.fixture(scope="session", autouse=True)
+def install(driver):
+    push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
+@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
+def test_backend_ops_htp0(type_a):
+    cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
+    if type_a == "q4_0":
+        cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
+    else:
+        cmd += f" -p type_a={type_a}"
+    result = run_shell_command(
+        cmd,
+        check=False,
+    )
+    write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
+    assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
+if __name__ == "__main__":
+    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
+    if os.path.exists("results.xml"):
+        with open("results.xml") as f:
+            write_qdc_log("results.xml", f.read())
+    sys.exit(ret)

run_bench_tests.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# ---------------------------------------------------------------------
+# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""
+On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
+Linux/IoT device version (QCS9075M, etc.)
+Executed by QDC's test framework on the QDC runner.
+The runner has SSH access to the allocated Linux device.
+Placeholders replaced at artifact creation time by run_qdc_jobs.py:
+  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
+"""
+import os
+import sys
+import pytest
+from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_shell_command, write_qdc_log
+MODEL_PATH = "/tmp/model.gguf"
+PROMPT = "What is the capital of France?"
+CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
+@pytest.fixture(scope="session", autouse=True)
+def install(driver):
+    push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
+    # Skip model download if already present
+    result = run_shell_command(f"ls {MODEL_PATH}", check=False)
+    if result.returncode != 0:
+        run_shell_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
+@pytest.mark.parametrize(
+    "device,extra_flags",
+    [
+        pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
+        pytest.param("GPUOpenCL", "", id="gpu"),
+        pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
+    ],
+)
+def test_llama_completion(device, extra_flags):
+    result = run_shell_command(
+        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
+        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
+        f' -p "{PROMPT}"',
+        check=False,
+    )
+    write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
+    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
+_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param("none", id="cpu"),
+        pytest.param("GPUOpenCL", id="gpu"),
+        pytest.param("HTP0", id="npu"),
+    ],
+)
+def test_llama_bench(device):
+    result = run_shell_command(
+        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
+        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
+        check=False,
+    )
+    write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
+    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
+if __name__ == "__main__":
+    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
+    if os.path.exists("results.xml"):
+        with open("results.xml") as f:
+            write_qdc_log("results.xml", f.read())
+    sys.exit(ret)

run_scorecard.py ADDED Viewed

	@@ -0,0 +1,375 @@

+# ---------------------------------------------------------------------
+# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""
+Scorecard benchmark script for llama.cpp on Linux IoT devices via SSH.
+This script runs comprehensive benchmarks:
+  1. Performance benchmarks (CPU/GPU/HTP x 3 context lengths)
+  2. Fallback ops detection (SCHED=1)
+  3. Perplexity (WikiText-2)
+Placeholders are replaced at artifact creation time:
+  - <<MODEL_URL>>: URL to download the model
+  - <<NUM_HTPS>>: Number of HTP cores to use
+"""
+import os
+import subprocess
+import sys
+import pytest
+from appium import webdriver
+from appium.options.common import AppiumOptions
+options = AppiumOptions()
+options.set_capability("automationName", "QDCLinux")
+options.set_capability("platformName", "Linux")
+options.set_capability("deviceName", os.getenv("QDC_DEVICE_NAME", "QCS9075M"))
+# Context lengths to benchmark
+CONTEXT_LENGTHS = [128, 1024, 4096]
+# System prompt for completion benchmarks
+SYSTEM_PROMPT = "You are a helpful assistant. Be helpful but brief."
+# WikiText-2 URL for perplexity
+WIKITEXT_URL = "https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw/wiki.test.raw"
+class TestScorecard:
+    @pytest.fixture
+    def driver(self) -> webdriver.Remote:
+        return webdriver.Remote(
+            command_executor="http://127.0.0.1:4723/wd/hub", options=options
+        )
+    def test_scorecard(self, driver: webdriver.Remote) -> None:
+        """Run comprehensive llama.cpp scorecard benchmarks."""
+        model_url = "<<MODEL_URL>>"
+        num_htps = "<<NUM_HTPS>>"
+        # On-device paths (Linux IoT - using /tmp)
+        basedir = "/tmp/llama_cpp_bundle"
+        model_path = "/tmp/gguf/model.gguf"
+        log_file = "/tmp/QDC_logs/scorecard.log"
+        scorecard_script = f"""
+cd /tmp/llama_cpp_bundle
+# Set library paths
+export LD_LIBRARY_PATH=/tmp/llama_cpp_bundle/lib:$LD_LIBRARY_PATH
+export ADSP_LIBRARY_PATH="/tmp/llama_cpp_bundle/lib:/system/lib/rfsa/adsp:/system/vendor/lib/rfsa/adsp:/dsp"
+# Make binaries executable
+chmod +x /tmp/llama_cpp_bundle/bin/*
+# Setup paths
+BASEDIR=/tmp/llama_cpp_bundle
+MODEL={model_path}
+LOG_FILE={log_file}
+NUM_HTPS={num_htps}
+# Create directories
+mkdir -p /tmp/gguf
+mkdir -p /tmp/QDC_logs
+# Download model
+echo "Downloading model from {model_url}..."
+curl -L -J --output $MODEL "{model_url}"
+# Initialize log
+echo "============================================================" > $LOG_FILE
+echo "LLAMA.CPP SCORECARD (Linux)" >> $LOG_FILE
+echo "Date: $(date)" >> $LOG_FILE
+echo "Model: {model_url}" >> $LOG_FILE
+echo "============================================================" >> $LOG_FILE
+#############################################
+# SECTION 1: PERFORMANCE BENCHMARKS
+#############################################
+echo "" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+echo "# SECTION 1: PERFORMANCE BENCHMARKS" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+# Common HTP flags
+HTP_FLAGS="--no-mmap --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 -fa on -ngl 99"
+# Run benchmarks for each compute unit and context length
+for COMPUTE in CPU GPU HTP; do
+    echo "" >> $LOG_FILE
+    echo "--- COMPUTE: $COMPUTE ---" >> $LOG_FILE
+    for CTX_LEN in 128 1024 4096; do
+        echo "" >> $LOG_FILE
+        echo "=== $COMPUTE | CTX=$CTX_LEN ===" >> $LOG_FILE
+        # Select prompt file
+        PROMPT_FILE="/tmp/llama_cpp_bundle/sample_prompt_${{CTX_LEN}}.txt"
+        if [ "$COMPUTE" = "CPU" ]; then
+            CMD="GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt --n-gpu-layers 0"
+            echo "COMMAND: $CMD" >> $LOG_FILE
+            GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
+                --model $MODEL \\
+                --n-predict -1 \\
+                --ctx-size $CTX_LEN \\
+                --system-prompt "{SYSTEM_PROMPT}" \\
+                --file "$PROMPT_FILE" \\
+                --seed 1 \\
+                --single-turn \\
+                --no-display-prompt \\
+                --n-gpu-layers 0 \\
+                2>&1 | tee -a $LOG_FILE
+        elif [ "$COMPUTE" = "GPU" ]; then
+            CMD="GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt -fa off"
+            echo "COMMAND: $CMD" >> $LOG_FILE
+            GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
+                --model $MODEL \\
+                --n-predict -1 \\
+                --ctx-size $CTX_LEN \\
+                --system-prompt "{SYSTEM_PROMPT}" \\
+                --file "$PROMPT_FILE" \\
+                --seed 1 \\
+                --single-turn \\
+                --no-display-prompt \\
+                -fa off \\
+                2>&1 | tee -a $LOG_FILE
+        elif [ "$COMPUTE" = "HTP" ]; then
+            CMD="GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt $HTP_FLAGS --device HTP0 -ctk f16 -ctv f16 --batch-size 128"
+            echo "COMMAND: $CMD" >> $LOG_FILE
+            GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion \\
+                --model $MODEL \\
+                --n-predict -1 \\
+                --ctx-size $CTX_LEN \\
+                --system-prompt "{SYSTEM_PROMPT}" \\
+                --file "$PROMPT_FILE" \\
+                --seed 1 \\
+                --single-turn \\
+                --no-display-prompt \\
+                $HTP_FLAGS \\
+                --device HTP0 \\
+                -ctk f16 \\
+                -ctv f16 \\
+                --batch-size 128 \\
+                2>&1 | tee -a $LOG_FILE
+        fi
+    done
+done
+#############################################
+# SECTION 2: FALLBACK OPS DETECTION (SCHED=1)
+#############################################
+echo "" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+echo "# SECTION 2: FALLBACK OPS (GGML_SCHED_DEBUG=2)" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+# GPU fallback ops
+echo "" >> $LOG_FILE
+echo "=== FALLBACK_OPS | GPU ===" >> $LOG_FILE
+echo "Running with GGML_SCHED_DEBUG=2 on GPU..." >> $LOG_FILE
+GGML_SCHED_DEBUG=2 GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
+    --model $MODEL \\
+    --n-predict 64 \\
+    --ctx-size 128 \\
+    -p "Hello world" \\
+    --seed 1 \\
+    --single-turn \\
+    --no-display-prompt \\
+    -fa off \\
+    -v \\
+    2>&1 | tee -a $LOG_FILE
+# HTP fallback ops
+echo "" >> $LOG_FILE
+echo "=== FALLBACK_OPS | HTP ===" >> $LOG_FILE
+echo "Running with GGML_SCHED_DEBUG=2 on HTP..." >> $LOG_FILE
+GGML_SCHED_DEBUG=2 GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion \\
+    --model $MODEL \\
+    --n-predict 64 \\
+    --ctx-size 128 \\
+    -p "Hello world" \\
+    --seed 1 \\
+    --single-turn \\
+    --no-display-prompt \\
+    $HTP_FLAGS \\
+    --device HTP0 \\
+    -ctk f16 \\
+    -ctv f16 \\
+    --batch-size 128 \\
+    -v \\
+    2>&1 | tee -a $LOG_FILE
+#############################################
+# SECTION 3: PERPLEXITY (WikiText-2)
+#############################################
+echo "" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+echo "# SECTION 3: PERPLEXITY (WikiText-2)" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+echo "" >> $LOG_FILE
+echo "Downloading WikiText-2 dataset..." >> $LOG_FILE
+curl -L -o /tmp/wiki.test.raw "{WIKITEXT_URL}"
+echo "" >> $LOG_FILE
+echo "Running perplexity on HTP..." >> $LOG_FILE
+echo "COMMAND: GGML_HEXAGON_NDEV=$NUM_HTPS llama-perplexity -m $MODEL -f wiki.test.raw --device HTP0 --chunks 10" >> $LOG_FILE
+GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-perplexity \\
+    -m $MODEL \\
+    -f /tmp/wiki.test.raw \\
+    --device HTP0 \\
+    --chunks 10 \\
+    $HTP_FLAGS \\
+    2>&1 | tee -a $LOG_FILE
+#############################################
+# SECTION 4: QUALITY CHECKS (Q&A Validation)
+#############################################
+echo "" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+echo "# SECTION 4: QUALITY CHECKS" >> $LOG_FILE
+echo "########################################################" >> $LOG_FILE
+# Run simple Q&A tests on GPU and HTP and check for expected answers
+# Format: QUALITY_CHECK: DEVICE | question | expected | PASS/FAIL
+run_quality_check() {{
+    local DEVICE="$1"
+    local QUESTION="$2"
+    local EXPECTED="$3"
+    echo "" >> $LOG_FILE
+    echo "--- Quality Check ($DEVICE) ---" >> $LOG_FILE
+    echo "Question: $QUESTION" >> $LOG_FILE
+    echo "Expected to contain: $EXPECTED" >> $LOG_FILE
+    # Filter: strip loading spinners, progress bars, and non-printable characters
+    FILTER='grep -v -E "^(Loading|\\||/|\\\\|-|\\[|model|warning|log|ggml|llama)" | sed "s/[^[:print:][:space:]]//g" | sed "/^[[:space:]]*$/d" | head -20'
+    if [ "$DEVICE" = "CPU" ]; then
+        RAW_RESPONSE=$(GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-cli \\
+            --model $MODEL \\
+            --system-prompt "{SYSTEM_PROMPT}" \\
+            -p "$QUESTION" \\
+            --n-predict 64 \\
+            --ctx-size 512 \\
+            --seed 1 \\
+            --no-display-prompt \\
+            --n-gpu-layers 0 \\
+            2>/dev/null)
+    elif [ "$DEVICE" = "GPU" ]; then
+        RAW_RESPONSE=$(GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-cli \\
+            --model $MODEL \\
+            --system-prompt "{SYSTEM_PROMPT}" \\
+            -p "$QUESTION" \\
+            --n-predict 64 \\
+            --ctx-size 512 \\
+            --seed 1 \\
+            --no-display-prompt \\
+            -fa off \\
+            2>/dev/null)
+    elif [ "$DEVICE" = "HTP" ]; then
+        RAW_RESPONSE=$(GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-cli \\
+            --model $MODEL \\
+            --system-prompt "{SYSTEM_PROMPT}" \\
+            -p "$QUESTION" \\
+            --n-predict 64 \\
+            --ctx-size 512 \\
+            --seed 1 \\
+            --no-display-prompt \\
+            $HTP_FLAGS \\
+            --device HTP0 \\
+            -ctk f16 \\
+            -ctv f16 \\
+            --batch-size 128 \\
+            2>/dev/null)
+    fi
+    RESPONSE=$(echo "$RAW_RESPONSE" | eval $FILTER)
+    echo "RESPONSE_START" >> $LOG_FILE
+    echo "$RESPONSE" >> $LOG_FILE
+    echo "RESPONSE_END" >> $LOG_FILE
+    # Check if expected string is in response (case-insensitive)
+    if echo "$RESPONSE" | grep -qi "$EXPECTED"; then
+        echo "QUALITY_CHECK: $DEVICE | $QUESTION | $EXPECTED | PASS" >> $LOG_FILE
+    else
+        echo "QUALITY_CHECK: $DEVICE | $QUESTION | $EXPECTED | FAIL" >> $LOG_FILE
+    fi
+}}
+# CPU Quality Checks
+echo "" >> $LOG_FILE
+echo "=== QUALITY_CHECKS | CPU ===" >> $LOG_FILE
+run_quality_check CPU "What is the capital of France?" "Paris"
+run_quality_check CPU "What is 2 + 2?" "4"
+run_quality_check CPU "What planet is closest to the Sun?" "Mercury"
+# GPU Quality Checks
+echo "" >> $LOG_FILE
+echo "=== QUALITY_CHECKS | GPU ===" >> $LOG_FILE
+run_quality_check GPU "What is the capital of France?" "Paris"
+run_quality_check GPU "What is 2 + 2?" "4"
+run_quality_check GPU "What planet is closest to the Sun?" "Mercury"
+# HTP Quality Checks
+echo "" >> $LOG_FILE
+echo "=== QUALITY_CHECKS | HTP ===" >> $LOG_FILE
+run_quality_check HTP "What is the capital of France?" "Paris"
+run_quality_check HTP "What is 2 + 2?" "4"
+run_quality_check HTP "What planet is closest to the Sun?" "Mercury"
+#############################################
+# COMPLETE
+#############################################
+echo "" >> $LOG_FILE
+echo "============================================================" >> $LOG_FILE
+echo "=== SCORECARD COMPLETE ===" >> $LOG_FILE
+echo "============================================================" >> $LOG_FILE
+"""
+        # Push the bundle to the device via SSH/SCP
+        device_host = os.getenv("QDC_DEVICE_HOST", "localhost")
+        subprocess.run(
+            ["scp", "-r", "/qdc/appium/llama_cpp_bundle/", f"{device_host}:/tmp/"],
+            capture_output=True,
+            encoding="utf-8",
+            errors="replace",
+            check=True,
+        )
+        # Run the scorecard script via SSH
+        result = subprocess.run(
+            [
+                "ssh",
+                device_host,
+                "sh",
+                "-c",
+                scorecard_script,
+            ],
+            capture_output=True,
+            encoding="utf-8",
+            errors="replace",
+            check=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)]))

utils.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# ---------------------------------------------------------------------
+# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""Shared helpers for QDC on-device test runners (Linux/IoT devices).
+Unlike Android devices which use ADB, Linux IoT devices are accessed via
+SSH through QDC's infrastructure. The test scripts run directly on the
+device through the QDC Appium session.
+"""
+import os
+import subprocess
+import tempfile
+from appium.options.common import AppiumOptions
+# ---------------------------------------------------------------------------
+# On-device paths (Linux IoT)
+# ---------------------------------------------------------------------------
+BUNDLE_PATH = "/tmp/llama_cpp_bundle"
+QDC_LOGS_PATH = "/tmp/QDC_logs"
+LIB_PATH = f"{BUNDLE_PATH}/lib"
+BIN_PATH = f"{BUNDLE_PATH}/bin"
+ENV_PREFIX = (
+    f"export LD_LIBRARY_PATH={LIB_PATH}:$LD_LIBRARY_PATH && "
+    f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
+    f"chmod +x {BIN_PATH}/* &&"
+)
+CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
+# ---------------------------------------------------------------------------
+# Appium session options (Linux/IoT)
+# ---------------------------------------------------------------------------
+options = AppiumOptions()
+options.set_capability("automationName", "QDCLinux")
+options.set_capability("platformName", "Linux")
+options.set_capability("deviceName", os.getenv("QDC_DEVICE_NAME", "QCS9075M"))
+# ---------------------------------------------------------------------------
+# SSH/Shell helpers for Linux devices
+# ---------------------------------------------------------------------------
+def run_shell_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
+    """Run a shell command on the Linux device.
+    For QDC Linux devices, commands are executed through the QDC infrastructure
+    which provides SSH access to the device. The QDC Appium driver handles the
+    SSH tunneling transparently.
+    """
+    raw = subprocess.run(
+        ["ssh", os.getenv("QDC_DEVICE_HOST", "localhost"), f"{cmd}; echo __RC__:$?"],
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        timeout=300,
+    )
+    stdout = raw.stdout
+    returncode = raw.returncode
+    if stdout:
+        lines = stdout.rstrip("\n").split("\n")
+        if lines and lines[-1].startswith("__RC__:"):
+            try:
+                returncode = int(lines[-1][7:])
+                stdout = "\n".join(lines[:-1]) + "\n"
+            except ValueError:
+                pass
+    print(stdout)
+    result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
+    if check:
+        assert returncode == 0, f"Command failed (exit {returncode})"
+    return result
+def write_qdc_log(filename: str, content: str) -> None:
+    """Write content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
+    run_shell_command(f"mkdir -p {QDC_LOGS_PATH}", check=False)
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
+        f.write(content)
+        tmp_path = f.name
+    try:
+        subprocess.run(
+            ["scp", tmp_path, f"{os.getenv('QDC_DEVICE_HOST', 'localhost')}:{QDC_LOGS_PATH}/{filename}"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            timeout=60,
+        )
+    finally:
+        os.unlink(tmp_path)
+def push_bundle_if_needed(check_binary: str) -> None:
+    """Push llama_cpp_bundle to the device if check_binary is not already present."""
+    result = run_shell_command(f"ls {check_binary}", check=False)
+    if result.returncode != 0:
+        subprocess.run(
+            ["scp", "-r", "/qdc/appium/llama_cpp_bundle/", f"{os.getenv('QDC_DEVICE_HOST', 'localhost')}:/tmp/"],
+            text=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            timeout=120,
+        )