Spaces:
Running
Running
Update index.html
Browse files- index.html +9 -1
index.html
CHANGED
|
@@ -441,7 +441,15 @@ async function loadModel(type, modelId) {
|
|
| 441 |
try {
|
| 442 |
const tf = await getTransformers();
|
| 443 |
if (type === 'llm') {
|
| 444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
} else if (type === 'embedder') {
|
| 446 |
STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress });
|
| 447 |
} else if (type === 'reranker') {
|
|
|
|
| 441 |
try {
|
| 442 |
const tf = await getTransformers();
|
| 443 |
if (type === 'llm') {
|
| 444 |
+
if (!navigator.gpu) {
|
| 445 |
+
throw new Error('WebGPU not available in this browser. q4f16 quantization REQUIRES WebGPU (it uses GatherBlockQuantized kernels that do not exist on CPU). Use Chrome 113+ or switch dtype to "q4" for CPU fallback.');
|
| 446 |
+
}
|
| 447 |
+
tf.env.backends.onnx.wasm.proxy = false;
|
| 448 |
+
STATE.models.llm = await tf.pipeline('text-generation', modelId, {
|
| 449 |
+
progress_callback: handleProgress,
|
| 450 |
+
dtype: 'q4f16',
|
| 451 |
+
device: 'webgpu'
|
| 452 |
+
});
|
| 453 |
} else if (type === 'embedder') {
|
| 454 |
STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress });
|
| 455 |
} else if (type === 'reranker') {
|