Spaces:
Running
Running
| // web/src/worker.js | |
| import { | |
| AutoProcessor, | |
| Gemma4ForConditionalGeneration, | |
| TextStreamer, | |
| InterruptableStoppingCriteria, | |
| load_image, | |
| RawImage, | |
| } from "@huggingface/transformers"; | |
| const MODEL_ID = "onnx-community/gemma-4-E2B-it-ONNX"; | |
| const THINK_START = "‹‹THINK››"; | |
| const THINK_END = "‹‹/THINK››"; | |
| function cleanGemmaOutput(raw) { | |
| return raw | |
| .replace(/<\|?channel\|?>?\s*thought\s*/gi, THINK_START) | |
| .replace(/<\|?channell?\|?>/gi, THINK_END) | |
| .replace(/<\|?[a-z_]+\|?>/gi, "") | |
| .trim(); | |
| } | |
| let processor = null; | |
| let model = null; | |
| const stoppingCriteria = new InterruptableStoppingCriteria(); | |
| async function checkWebGPU() { | |
| try { | |
| const adapter = await navigator.gpu?.requestAdapter(); | |
| self.postMessage({ | |
| type: "status", | |
| status: adapter ? "webgpu-available" : "webgpu-unavailable", | |
| }); | |
| } catch { | |
| self.postMessage({ type: "status", status: "webgpu-unavailable" }); | |
| } | |
| } | |
| async function loadModel() { | |
| try { | |
| self.postMessage({ type: "status", status: "loading" }); | |
| const progress_callback = (p) => self.postMessage({ type: "progress", ...p }); | |
| processor = await AutoProcessor.from_pretrained(MODEL_ID, { progress_callback }); | |
| model = await Gemma4ForConditionalGeneration.from_pretrained(MODEL_ID, { | |
| dtype: "q4f16", | |
| device: "webgpu", | |
| progress_callback, | |
| }); | |
| self.postMessage({ type: "status", status: "ready" }); | |
| } catch (err) { | |
| self.postMessage({ type: "error", message: err.message }); | |
| } | |
| } | |
| async function generate({ messages, imageUrl, videoData, audioData, enableThinking }) { | |
| if (!model || !processor) { | |
| self.postMessage({ type: "error", message: "Model not loaded" }); | |
| return; | |
| } | |
| try { | |
| self.postMessage({ type: "status", status: "generating" }); | |
| stoppingCriteria.reset(); | |
| const prompt = processor.apply_chat_template(messages, { | |
| enable_thinking: enableThinking, | |
| add_generation_prompt: true, | |
| }); | |
| // Gemma4ImageProcessor expects RawImage | RawImage[], not RawVideo | |
| let image = null; | |
| if (videoData) { | |
| image = videoData.frames.map((f) => | |
| new RawImage(new Uint8ClampedArray(f.data), f.width, f.height, f.channels) | |
| ); | |
| } else if (imageUrl) { | |
| image = await load_image(imageUrl); | |
| } | |
| const audio = audioData ?? null; | |
| const inputs = await processor(prompt, image, audio, { | |
| add_special_tokens: false, | |
| }); | |
| let fullText = ""; | |
| const streamer = new TextStreamer(processor.tokenizer, { | |
| skip_prompt: true, | |
| skip_special_tokens: false, | |
| callback_function: (text) => { | |
| fullText += text; | |
| const cleaned = cleanGemmaOutput(fullText); | |
| self.postMessage({ type: "update", text: cleaned }); | |
| }, | |
| }); | |
| await model.generate({ | |
| ...inputs, | |
| max_new_tokens: 512, | |
| do_sample: false, | |
| streamer, | |
| stopping_criteria: [stoppingCriteria], | |
| }); | |
| self.postMessage({ type: "complete", text: cleanGemmaOutput(fullText) }); | |
| } catch (err) { | |
| self.postMessage({ type: "error", message: err.message }); | |
| } | |
| } | |
| self.onmessage = (e) => { | |
| switch (e.data.type) { | |
| case "check": | |
| checkWebGPU(); | |
| break; | |
| case "load": | |
| loadModel(); | |
| break; | |
| case "generate": | |
| generate(e.data); | |
| break; | |
| case "interrupt": | |
| stoppingCriteria.interrupt(); | |
| break; | |
| } | |
| }; | |