rikhoffbauer2
/

floorplan-parser

Model card Files Files and versions

xet

Community

rikhoffbauer2 commited on 4 days ago

Commit

7dc3aac

verified ·

1 Parent(s): d68baaa

Update parser.py: handle Qwen3 thinking tokens, /no_think support

Browse files

Files changed (1) hide show

floorplan/parser.py +19 -6

floorplan/parser.py CHANGED Viewed

@@ -42,7 +42,9 @@ def _image_to_base64(image: Image.Image | str | Path) -> str:
 def _extract_json_from_response(text: str) -> dict:
-    """Extract JSON from a VLM response that may contain markdown code blocks."""
     text = text.strip()
     try:
         return json.loads(text)
@@ -182,12 +184,18 @@ class FloorPlanParser:
     def parse_image(self, image, detail="high", temperature=0.1):
         """Parse a floor plan image into the structured schema."""
         image_b64 = _image_to_base64(image)
         response = self.client.chat.completions.create(
-            model=self.model, temperature=temperature, max_tokens=16384,
             messages=[
-                {"role": "system", "content": PARSE_SYSTEM_PROMPT},
                 {"role": "user", "content": [
-                    {"type": "text", "text": "Parse this floor plan image. Extract ALL walls with their centerlines, thicknesses, and any doors/windows on them. Use pixel coordinates. Be thorough — every wall segment matters for room detection."},
                     {"type": "image_url", "image_url": {"url": image_b64, "detail": detail}},
                 ]},
             ],
@@ -201,10 +209,15 @@ class FloorPlanParser:
         orig_b64 = _image_to_base64(original_image)
         overlay_b64 = _image_to_base64(overlay_image)
         schema_json = current_schema.model_dump_json(indent=2)
         response = self.client.chat.completions.create(
-            model=self.model, temperature=temperature, max_tokens=8192,
             messages=[
-                {"role": "system", "content": CORRECTION_SYSTEM_PROMPT},
                 {"role": "user", "content": [
                     {"type": "text", "text": f"Iteration {iteration}. Compare these two images and identify corrections needed.\n\nCurrent schema ({len(current_schema.walls)} walls, {sum(len(w.openings) for w in current_schema.walls)} openings):\n```json\n{schema_json}\n```\n\nImage 1 is the ORIGINAL floor plan. Image 2 is the OVERLAY (parsed schema rendered on the original)."},
                     {"type": "image_url", "image_url": {"url": orig_b64, "detail": "high"}},

 def _extract_json_from_response(text: str) -> dict:
+    """Extract JSON from a VLM response that may contain markdown code blocks or thinking tokens."""
+    # Strip Qwen3-style thinking blocks
+    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
     text = text.strip()
     try:
         return json.loads(text)
     def parse_image(self, image, detail="high", temperature=0.1):
         """Parse a floor plan image into the structured schema."""
         image_b64 = _image_to_base64(image)
+        # For Qwen3 models, prepend /no_think to disable extended reasoning
+        system_content = PARSE_SYSTEM_PROMPT
+        if "Qwen3" in self.model:
+            system_content = "/no_think\n" + system_content
         response = self.client.chat.completions.create(
+            model=self.model, temperature=temperature, max_tokens=8192,
             messages=[
+                {"role": "system", "content": system_content},
                 {"role": "user", "content": [
+                    {"type": "text", "text": "Parse this floor plan image. Extract ALL walls with their centerlines, thicknesses, and any doors/windows on them. Use pixel coordinates. Be thorough — every wall segment matters for room detection. Output ONLY the JSON, no explanation."},
                     {"type": "image_url", "image_url": {"url": image_b64, "detail": detail}},
                 ]},
             ],
         orig_b64 = _image_to_base64(original_image)
         overlay_b64 = _image_to_base64(overlay_image)
         schema_json = current_schema.model_dump_json(indent=2)
+        system_content = CORRECTION_SYSTEM_PROMPT
+        if "Qwen3" in self.model:
+            system_content = "/no_think\n" + system_content
         response = self.client.chat.completions.create(
+            model=self.model, temperature=temperature, max_tokens=4096,
             messages=[
+                {"role": "system", "content": system_content},
                 {"role": "user", "content": [
                     {"type": "text", "text": f"Iteration {iteration}. Compare these two images and identify corrections needed.\n\nCurrent schema ({len(current_schema.walls)} walls, {sum(len(w.openings) for w in current_schema.walls)} openings):\n```json\n{schema_json}\n```\n\nImage 1 is the ORIGINAL floor plan. Image 2 is the OVERLAY (parsed schema rendered on the original)."},
                     {"type": "image_url", "image_url": {"url": orig_b64, "detail": "high"}},