zackliqcom commited on
Commit
0938148
·
verified ·
1 Parent(s): c8374ed

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. conftest.py +24 -0
  2. run_backend_ops.py +46 -0
  3. run_bench_tests.py +83 -0
  4. run_scorecard.py +375 -0
  5. utils.py +105 -0
conftest.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------------------------------
2
+ # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # ---------------------------------------------------------------------
5
+ """Shared pytest fixtures for QDC on-device test runners."""
6
+
7
+ import os
8
+
9
+ import pytest
10
+ from appium import webdriver
11
+
12
+ from utils import options, write_qdc_log
13
+
14
+
15
+ @pytest.fixture(scope="session", autouse=True)
16
+ def driver():
17
+ return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
18
+
19
+
20
+ def pytest_sessionfinish(session, exitstatus):
21
+ xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
22
+ if os.path.exists(xml_path):
23
+ with open(xml_path) as f:
24
+ write_qdc_log("results.xml", f.read())
run_backend_ops.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------------------------------
2
+ # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # ---------------------------------------------------------------------
5
+ """
6
+ On-device test-backend-ops runner for llama.cpp (HTP0 backend).
7
+ Linux/IoT device version (QCS9075M, etc.)
8
+
9
+ Executed by QDC's test framework on the QDC runner.
10
+ The runner has SSH access to the allocated Linux device.
11
+ """
12
+
13
+ import os
14
+ import sys
15
+
16
+ import pytest
17
+
18
+ from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_shell_command, write_qdc_log
19
+
20
+
21
+ @pytest.fixture(scope="session", autouse=True)
22
+ def install(driver):
23
+ push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
24
+
25
+
26
+ @pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
27
+ def test_backend_ops_htp0(type_a):
28
+ cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
29
+ if type_a == "q4_0":
30
+ cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
31
+ else:
32
+ cmd += f" -p type_a={type_a}"
33
+ result = run_shell_command(
34
+ cmd,
35
+ check=False,
36
+ )
37
+ write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
38
+ assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
39
+
40
+
41
+ if __name__ == "__main__":
42
+ ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
43
+ if os.path.exists("results.xml"):
44
+ with open("results.xml") as f:
45
+ write_qdc_log("results.xml", f.read())
46
+ sys.exit(ret)
run_bench_tests.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------------------------------
2
+ # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # ---------------------------------------------------------------------
5
+ """
6
+ On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
7
+ Linux/IoT device version (QCS9075M, etc.)
8
+
9
+ Executed by QDC's test framework on the QDC runner.
10
+ The runner has SSH access to the allocated Linux device.
11
+
12
+ Placeholders replaced at artifact creation time by run_qdc_jobs.py:
13
+ <<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
14
+ """
15
+
16
+ import os
17
+ import sys
18
+
19
+ import pytest
20
+
21
+ from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_shell_command, write_qdc_log
22
+
23
+ MODEL_PATH = "/tmp/model.gguf"
24
+ PROMPT = "What is the capital of France?"
25
+ CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
26
+
27
+
28
+ @pytest.fixture(scope="session", autouse=True)
29
+ def install(driver):
30
+ push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
31
+
32
+ # Skip model download if already present
33
+ result = run_shell_command(f"ls {MODEL_PATH}", check=False)
34
+ if result.returncode != 0:
35
+ run_shell_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
36
+
37
+
38
+ @pytest.mark.parametrize(
39
+ "device,extra_flags",
40
+ [
41
+ pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
42
+ pytest.param("GPUOpenCL", "", id="gpu"),
43
+ pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
44
+ ],
45
+ )
46
+ def test_llama_completion(device, extra_flags):
47
+ result = run_shell_command(
48
+ f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
49
+ f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
50
+ f' -p "{PROMPT}"',
51
+ check=False,
52
+ )
53
+ write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
54
+ assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
55
+
56
+
57
+ _DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
58
+
59
+
60
+ @pytest.mark.parametrize(
61
+ "device",
62
+ [
63
+ pytest.param("none", id="cpu"),
64
+ pytest.param("GPUOpenCL", id="gpu"),
65
+ pytest.param("HTP0", id="npu"),
66
+ ],
67
+ )
68
+ def test_llama_bench(device):
69
+ result = run_shell_command(
70
+ f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
71
+ f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
72
+ check=False,
73
+ )
74
+ write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
75
+ assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
76
+
77
+
78
+ if __name__ == "__main__":
79
+ ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
80
+ if os.path.exists("results.xml"):
81
+ with open("results.xml") as f:
82
+ write_qdc_log("results.xml", f.read())
83
+ sys.exit(ret)
run_scorecard.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------------------------------
2
+ # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # ---------------------------------------------------------------------
5
+ """
6
+ Scorecard benchmark script for llama.cpp on Linux IoT devices via SSH.
7
+
8
+ This script runs comprehensive benchmarks:
9
+ 1. Performance benchmarks (CPU/GPU/HTP x 3 context lengths)
10
+ 2. Fallback ops detection (SCHED=1)
11
+ 3. Perplexity (WikiText-2)
12
+
13
+ Placeholders are replaced at artifact creation time:
14
+ - <<MODEL_URL>>: URL to download the model
15
+ - <<NUM_HTPS>>: Number of HTP cores to use
16
+ """
17
+
18
+ import os
19
+ import subprocess
20
+ import sys
21
+
22
+ import pytest
23
+ from appium import webdriver
24
+ from appium.options.common import AppiumOptions
25
+
26
+ options = AppiumOptions()
27
+ options.set_capability("automationName", "QDCLinux")
28
+ options.set_capability("platformName", "Linux")
29
+ options.set_capability("deviceName", os.getenv("QDC_DEVICE_NAME", "QCS9075M"))
30
+
31
+ # Context lengths to benchmark
32
+ CONTEXT_LENGTHS = [128, 1024, 4096]
33
+
34
+ # System prompt for completion benchmarks
35
+ SYSTEM_PROMPT = "You are a helpful assistant. Be helpful but brief."
36
+
37
+ # WikiText-2 URL for perplexity
38
+ WIKITEXT_URL = "https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw/wiki.test.raw"
39
+
40
+
41
+ class TestScorecard:
42
+ @pytest.fixture
43
+ def driver(self) -> webdriver.Remote:
44
+ return webdriver.Remote(
45
+ command_executor="http://127.0.0.1:4723/wd/hub", options=options
46
+ )
47
+
48
+ def test_scorecard(self, driver: webdriver.Remote) -> None:
49
+ """Run comprehensive llama.cpp scorecard benchmarks."""
50
+ model_url = "<<MODEL_URL>>"
51
+ num_htps = "<<NUM_HTPS>>"
52
+
53
+ # On-device paths (Linux IoT - using /tmp)
54
+ basedir = "/tmp/llama_cpp_bundle"
55
+ model_path = "/tmp/gguf/model.gguf"
56
+ log_file = "/tmp/QDC_logs/scorecard.log"
57
+
58
+ scorecard_script = f"""
59
+ cd /tmp/llama_cpp_bundle
60
+
61
+ # Set library paths
62
+ export LD_LIBRARY_PATH=/tmp/llama_cpp_bundle/lib:$LD_LIBRARY_PATH
63
+ export ADSP_LIBRARY_PATH="/tmp/llama_cpp_bundle/lib:/system/lib/rfsa/adsp:/system/vendor/lib/rfsa/adsp:/dsp"
64
+
65
+ # Make binaries executable
66
+ chmod +x /tmp/llama_cpp_bundle/bin/*
67
+
68
+ # Setup paths
69
+ BASEDIR=/tmp/llama_cpp_bundle
70
+ MODEL={model_path}
71
+ LOG_FILE={log_file}
72
+ NUM_HTPS={num_htps}
73
+
74
+ # Create directories
75
+ mkdir -p /tmp/gguf
76
+ mkdir -p /tmp/QDC_logs
77
+
78
+ # Download model
79
+ echo "Downloading model from {model_url}..."
80
+ curl -L -J --output $MODEL "{model_url}"
81
+
82
+ # Initialize log
83
+ echo "============================================================" > $LOG_FILE
84
+ echo "LLAMA.CPP SCORECARD (Linux)" >> $LOG_FILE
85
+ echo "Date: $(date)" >> $LOG_FILE
86
+ echo "Model: {model_url}" >> $LOG_FILE
87
+ echo "============================================================" >> $LOG_FILE
88
+
89
+ #############################################
90
+ # SECTION 1: PERFORMANCE BENCHMARKS
91
+ #############################################
92
+ echo "" >> $LOG_FILE
93
+ echo "########################################################" >> $LOG_FILE
94
+ echo "# SECTION 1: PERFORMANCE BENCHMARKS" >> $LOG_FILE
95
+ echo "########################################################" >> $LOG_FILE
96
+
97
+ # Common HTP flags
98
+ HTP_FLAGS="--no-mmap --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 -fa on -ngl 99"
99
+
100
+ # Run benchmarks for each compute unit and context length
101
+ for COMPUTE in CPU GPU HTP; do
102
+ echo "" >> $LOG_FILE
103
+ echo "--- COMPUTE: $COMPUTE ---" >> $LOG_FILE
104
+
105
+ for CTX_LEN in 128 1024 4096; do
106
+ echo "" >> $LOG_FILE
107
+ echo "=== $COMPUTE | CTX=$CTX_LEN ===" >> $LOG_FILE
108
+
109
+ # Select prompt file
110
+ PROMPT_FILE="/tmp/llama_cpp_bundle/sample_prompt_${{CTX_LEN}}.txt"
111
+
112
+ if [ "$COMPUTE" = "CPU" ]; then
113
+ CMD="GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt --n-gpu-layers 0"
114
+ echo "COMMAND: $CMD" >> $LOG_FILE
115
+ GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
116
+ --model $MODEL \\
117
+ --n-predict -1 \\
118
+ --ctx-size $CTX_LEN \\
119
+ --system-prompt "{SYSTEM_PROMPT}" \\
120
+ --file "$PROMPT_FILE" \\
121
+ --seed 1 \\
122
+ --single-turn \\
123
+ --no-display-prompt \\
124
+ --n-gpu-layers 0 \\
125
+ 2>&1 | tee -a $LOG_FILE
126
+
127
+ elif [ "$COMPUTE" = "GPU" ]; then
128
+ CMD="GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt -fa off"
129
+ echo "COMMAND: $CMD" >> $LOG_FILE
130
+ GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
131
+ --model $MODEL \\
132
+ --n-predict -1 \\
133
+ --ctx-size $CTX_LEN \\
134
+ --system-prompt "{SYSTEM_PROMPT}" \\
135
+ --file "$PROMPT_FILE" \\
136
+ --seed 1 \\
137
+ --single-turn \\
138
+ --no-display-prompt \\
139
+ -fa off \\
140
+ 2>&1 | tee -a $LOG_FILE
141
+
142
+ elif [ "$COMPUTE" = "HTP" ]; then
143
+ CMD="GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt $HTP_FLAGS --device HTP0 -ctk f16 -ctv f16 --batch-size 128"
144
+ echo "COMMAND: $CMD" >> $LOG_FILE
145
+ GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion \\
146
+ --model $MODEL \\
147
+ --n-predict -1 \\
148
+ --ctx-size $CTX_LEN \\
149
+ --system-prompt "{SYSTEM_PROMPT}" \\
150
+ --file "$PROMPT_FILE" \\
151
+ --seed 1 \\
152
+ --single-turn \\
153
+ --no-display-prompt \\
154
+ $HTP_FLAGS \\
155
+ --device HTP0 \\
156
+ -ctk f16 \\
157
+ -ctv f16 \\
158
+ --batch-size 128 \\
159
+ 2>&1 | tee -a $LOG_FILE
160
+ fi
161
+ done
162
+ done
163
+
164
+ #############################################
165
+ # SECTION 2: FALLBACK OPS DETECTION (SCHED=1)
166
+ #############################################
167
+ echo "" >> $LOG_FILE
168
+ echo "########################################################" >> $LOG_FILE
169
+ echo "# SECTION 2: FALLBACK OPS (GGML_SCHED_DEBUG=2)" >> $LOG_FILE
170
+ echo "########################################################" >> $LOG_FILE
171
+
172
+ # GPU fallback ops
173
+ echo "" >> $LOG_FILE
174
+ echo "=== FALLBACK_OPS | GPU ===" >> $LOG_FILE
175
+ echo "Running with GGML_SCHED_DEBUG=2 on GPU..." >> $LOG_FILE
176
+
177
+ GGML_SCHED_DEBUG=2 GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
178
+ --model $MODEL \\
179
+ --n-predict 64 \\
180
+ --ctx-size 128 \\
181
+ -p "Hello world" \\
182
+ --seed 1 \\
183
+ --single-turn \\
184
+ --no-display-prompt \\
185
+ -fa off \\
186
+ -v \\
187
+ 2>&1 | tee -a $LOG_FILE
188
+
189
+ # HTP fallback ops
190
+ echo "" >> $LOG_FILE
191
+ echo "=== FALLBACK_OPS | HTP ===" >> $LOG_FILE
192
+ echo "Running with GGML_SCHED_DEBUG=2 on HTP..." >> $LOG_FILE
193
+
194
+ GGML_SCHED_DEBUG=2 GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion \\
195
+ --model $MODEL \\
196
+ --n-predict 64 \\
197
+ --ctx-size 128 \\
198
+ -p "Hello world" \\
199
+ --seed 1 \\
200
+ --single-turn \\
201
+ --no-display-prompt \\
202
+ $HTP_FLAGS \\
203
+ --device HTP0 \\
204
+ -ctk f16 \\
205
+ -ctv f16 \\
206
+ --batch-size 128 \\
207
+ -v \\
208
+ 2>&1 | tee -a $LOG_FILE
209
+
210
+ #############################################
211
+ # SECTION 3: PERPLEXITY (WikiText-2)
212
+ #############################################
213
+ echo "" >> $LOG_FILE
214
+ echo "########################################################" >> $LOG_FILE
215
+ echo "# SECTION 3: PERPLEXITY (WikiText-2)" >> $LOG_FILE
216
+ echo "########################################################" >> $LOG_FILE
217
+
218
+ echo "" >> $LOG_FILE
219
+ echo "Downloading WikiText-2 dataset..." >> $LOG_FILE
220
+ curl -L -o /tmp/wiki.test.raw "{WIKITEXT_URL}"
221
+
222
+ echo "" >> $LOG_FILE
223
+ echo "Running perplexity on HTP..." >> $LOG_FILE
224
+ echo "COMMAND: GGML_HEXAGON_NDEV=$NUM_HTPS llama-perplexity -m $MODEL -f wiki.test.raw --device HTP0 --chunks 10" >> $LOG_FILE
225
+
226
+ GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-perplexity \\
227
+ -m $MODEL \\
228
+ -f /tmp/wiki.test.raw \\
229
+ --device HTP0 \\
230
+ --chunks 10 \\
231
+ $HTP_FLAGS \\
232
+ 2>&1 | tee -a $LOG_FILE
233
+
234
+ #############################################
235
+ # SECTION 4: QUALITY CHECKS (Q&A Validation)
236
+ #############################################
237
+ echo "" >> $LOG_FILE
238
+ echo "########################################################" >> $LOG_FILE
239
+ echo "# SECTION 4: QUALITY CHECKS" >> $LOG_FILE
240
+ echo "########################################################" >> $LOG_FILE
241
+
242
+ # Run simple Q&A tests on GPU and HTP and check for expected answers
243
+ # Format: QUALITY_CHECK: DEVICE | question | expected | PASS/FAIL
244
+
245
+ run_quality_check() {{
246
+ local DEVICE="$1"
247
+ local QUESTION="$2"
248
+ local EXPECTED="$3"
249
+
250
+ echo "" >> $LOG_FILE
251
+ echo "--- Quality Check ($DEVICE) ---" >> $LOG_FILE
252
+ echo "Question: $QUESTION" >> $LOG_FILE
253
+ echo "Expected to contain: $EXPECTED" >> $LOG_FILE
254
+
255
+ # Filter: strip loading spinners, progress bars, and non-printable characters
256
+ FILTER='grep -v -E "^(Loading|\\||/|\\\\|-|\\[|model|warning|log|ggml|llama)" | sed "s/[^[:print:][:space:]]//g" | sed "/^[[:space:]]*$/d" | head -20'
257
+
258
+ if [ "$DEVICE" = "CPU" ]; then
259
+ RAW_RESPONSE=$(GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-cli \\
260
+ --model $MODEL \\
261
+ --system-prompt "{SYSTEM_PROMPT}" \\
262
+ -p "$QUESTION" \\
263
+ --n-predict 64 \\
264
+ --ctx-size 512 \\
265
+ --seed 1 \\
266
+ --no-display-prompt \\
267
+ --n-gpu-layers 0 \\
268
+ 2>/dev/null)
269
+
270
+ elif [ "$DEVICE" = "GPU" ]; then
271
+ RAW_RESPONSE=$(GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-cli \\
272
+ --model $MODEL \\
273
+ --system-prompt "{SYSTEM_PROMPT}" \\
274
+ -p "$QUESTION" \\
275
+ --n-predict 64 \\
276
+ --ctx-size 512 \\
277
+ --seed 1 \\
278
+ --no-display-prompt \\
279
+ -fa off \\
280
+ 2>/dev/null)
281
+
282
+ elif [ "$DEVICE" = "HTP" ]; then
283
+ RAW_RESPONSE=$(GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-cli \\
284
+ --model $MODEL \\
285
+ --system-prompt "{SYSTEM_PROMPT}" \\
286
+ -p "$QUESTION" \\
287
+ --n-predict 64 \\
288
+ --ctx-size 512 \\
289
+ --seed 1 \\
290
+ --no-display-prompt \\
291
+ $HTP_FLAGS \\
292
+ --device HTP0 \\
293
+ -ctk f16 \\
294
+ -ctv f16 \\
295
+ --batch-size 128 \\
296
+ 2>/dev/null)
297
+ fi
298
+
299
+ RESPONSE=$(echo "$RAW_RESPONSE" | eval $FILTER)
300
+
301
+ echo "RESPONSE_START" >> $LOG_FILE
302
+ echo "$RESPONSE" >> $LOG_FILE
303
+ echo "RESPONSE_END" >> $LOG_FILE
304
+
305
+ # Check if expected string is in response (case-insensitive)
306
+ if echo "$RESPONSE" | grep -qi "$EXPECTED"; then
307
+ echo "QUALITY_CHECK: $DEVICE | $QUESTION | $EXPECTED | PASS" >> $LOG_FILE
308
+ else
309
+ echo "QUALITY_CHECK: $DEVICE | $QUESTION | $EXPECTED | FAIL" >> $LOG_FILE
310
+ fi
311
+ }}
312
+
313
+ # CPU Quality Checks
314
+ echo "" >> $LOG_FILE
315
+ echo "=== QUALITY_CHECKS | CPU ===" >> $LOG_FILE
316
+
317
+ run_quality_check CPU "What is the capital of France?" "Paris"
318
+ run_quality_check CPU "What is 2 + 2?" "4"
319
+ run_quality_check CPU "What planet is closest to the Sun?" "Mercury"
320
+
321
+ # GPU Quality Checks
322
+ echo "" >> $LOG_FILE
323
+ echo "=== QUALITY_CHECKS | GPU ===" >> $LOG_FILE
324
+
325
+ run_quality_check GPU "What is the capital of France?" "Paris"
326
+ run_quality_check GPU "What is 2 + 2?" "4"
327
+ run_quality_check GPU "What planet is closest to the Sun?" "Mercury"
328
+
329
+ # HTP Quality Checks
330
+ echo "" >> $LOG_FILE
331
+ echo "=== QUALITY_CHECKS | HTP ===" >> $LOG_FILE
332
+
333
+ run_quality_check HTP "What is the capital of France?" "Paris"
334
+ run_quality_check HTP "What is 2 + 2?" "4"
335
+ run_quality_check HTP "What planet is closest to the Sun?" "Mercury"
336
+
337
+ #############################################
338
+ # COMPLETE
339
+ #############################################
340
+ echo "" >> $LOG_FILE
341
+ echo "============================================================" >> $LOG_FILE
342
+ echo "=== SCORECARD COMPLETE ===" >> $LOG_FILE
343
+ echo "============================================================" >> $LOG_FILE
344
+ """
345
+
346
+ # Push the bundle to the device via SSH/SCP
347
+ device_host = os.getenv("QDC_DEVICE_HOST", "localhost")
348
+ subprocess.run(
349
+ ["scp", "-r", "/qdc/appium/llama_cpp_bundle/", f"{device_host}:/tmp/"],
350
+ capture_output=True,
351
+ encoding="utf-8",
352
+ errors="replace",
353
+ check=True,
354
+ )
355
+
356
+ # Run the scorecard script via SSH
357
+ result = subprocess.run(
358
+ [
359
+ "ssh",
360
+ device_host,
361
+ "sh",
362
+ "-c",
363
+ scorecard_script,
364
+ ],
365
+ capture_output=True,
366
+ encoding="utf-8",
367
+ errors="replace",
368
+ check=True,
369
+ )
370
+ print(result.stdout)
371
+ print(result.stderr)
372
+
373
+
374
+ if __name__ == "__main__":
375
+ sys.exit(pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)]))
utils.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------------------------------
2
+ # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # ---------------------------------------------------------------------
5
+ """Shared helpers for QDC on-device test runners (Linux/IoT devices).
6
+
7
+ Unlike Android devices which use ADB, Linux IoT devices are accessed via
8
+ SSH through QDC's infrastructure. The test scripts run directly on the
9
+ device through the QDC Appium session.
10
+ """
11
+
12
+ import os
13
+ import subprocess
14
+ import tempfile
15
+
16
+ from appium.options.common import AppiumOptions
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # On-device paths (Linux IoT)
20
+ # ---------------------------------------------------------------------------
21
+
22
+ BUNDLE_PATH = "/tmp/llama_cpp_bundle"
23
+ QDC_LOGS_PATH = "/tmp/QDC_logs"
24
+ LIB_PATH = f"{BUNDLE_PATH}/lib"
25
+ BIN_PATH = f"{BUNDLE_PATH}/bin"
26
+ ENV_PREFIX = (
27
+ f"export LD_LIBRARY_PATH={LIB_PATH}:$LD_LIBRARY_PATH && "
28
+ f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
29
+ f"chmod +x {BIN_PATH}/* &&"
30
+ )
31
+ CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Appium session options (Linux/IoT)
35
+ # ---------------------------------------------------------------------------
36
+
37
+ options = AppiumOptions()
38
+ options.set_capability("automationName", "QDCLinux")
39
+ options.set_capability("platformName", "Linux")
40
+ options.set_capability("deviceName", os.getenv("QDC_DEVICE_NAME", "QCS9075M"))
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # SSH/Shell helpers for Linux devices
44
+ # ---------------------------------------------------------------------------
45
+
46
+
47
+ def run_shell_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
48
+ """Run a shell command on the Linux device.
49
+
50
+ For QDC Linux devices, commands are executed through the QDC infrastructure
51
+ which provides SSH access to the device. The QDC Appium driver handles the
52
+ SSH tunneling transparently.
53
+ """
54
+ raw = subprocess.run(
55
+ ["ssh", os.getenv("QDC_DEVICE_HOST", "localhost"), f"{cmd}; echo __RC__:$?"],
56
+ text=True,
57
+ stdout=subprocess.PIPE,
58
+ stderr=subprocess.STDOUT,
59
+ timeout=300,
60
+ )
61
+ stdout = raw.stdout
62
+ returncode = raw.returncode
63
+ if stdout:
64
+ lines = stdout.rstrip("\n").split("\n")
65
+ if lines and lines[-1].startswith("__RC__:"):
66
+ try:
67
+ returncode = int(lines[-1][7:])
68
+ stdout = "\n".join(lines[:-1]) + "\n"
69
+ except ValueError:
70
+ pass
71
+ print(stdout)
72
+ result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
73
+ if check:
74
+ assert returncode == 0, f"Command failed (exit {returncode})"
75
+ return result
76
+
77
+
78
+ def write_qdc_log(filename: str, content: str) -> None:
79
+ """Write content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
80
+ run_shell_command(f"mkdir -p {QDC_LOGS_PATH}", check=False)
81
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
82
+ f.write(content)
83
+ tmp_path = f.name
84
+ try:
85
+ subprocess.run(
86
+ ["scp", tmp_path, f"{os.getenv('QDC_DEVICE_HOST', 'localhost')}:{QDC_LOGS_PATH}/{filename}"],
87
+ stdout=subprocess.PIPE,
88
+ stderr=subprocess.STDOUT,
89
+ timeout=60,
90
+ )
91
+ finally:
92
+ os.unlink(tmp_path)
93
+
94
+
95
+ def push_bundle_if_needed(check_binary: str) -> None:
96
+ """Push llama_cpp_bundle to the device if check_binary is not already present."""
97
+ result = run_shell_command(f"ls {check_binary}", check=False)
98
+ if result.returncode != 0:
99
+ subprocess.run(
100
+ ["scp", "-r", "/qdc/appium/llama_cpp_bundle/", f"{os.getenv('QDC_DEVICE_HOST', 'localhost')}:/tmp/"],
101
+ text=True,
102
+ stdout=subprocess.PIPE,
103
+ stderr=subprocess.STDOUT,
104
+ timeout=120,
105
+ )