Spaces:

quickgrid
/

RAG-Visualizer

Running

quickgrid commited on 4 days ago

Commit

8a5dba2

verified ·

1 Parent(s): abc06db

Update index.html

Files changed (1) hide show

index.html CHANGED Viewed

@@ -441,7 +441,15 @@ async function loadModel(type, modelId) {
   try {
     const tf = await getTransformers();
     if (type === 'llm') {
-      STATE.models.llm = await tf.pipeline('text-generation', modelId, { progress_callback: handleProgress, dtype: 'q4f16' });
     } else if (type === 'embedder') {
       STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress });
     } else if (type === 'reranker') {

   try {
     const tf = await getTransformers();
     if (type === 'llm') {
+      if (!navigator.gpu) {
+        throw new Error('WebGPU not available in this browser. q4f16 quantization REQUIRES WebGPU (it uses GatherBlockQuantized kernels that do not exist on CPU). Use Chrome 113+ or switch dtype to "q4" for CPU fallback.');
+      }
+      tf.env.backends.onnx.wasm.proxy = false;
+      STATE.models.llm = await tf.pipeline('text-generation', modelId, {
+        progress_callback: handleProgress,
+        dtype: 'q4f16',
+        device: 'webgpu'
+      });
     } else if (type === 'embedder') {
       STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress });
     } else if (type === 'reranker') {