rikhoffbauer2 commited on
Commit
7dc3aac
·
verified ·
1 Parent(s): d68baaa

Update parser.py: handle Qwen3 thinking tokens, /no_think support

Browse files
Files changed (1) hide show
  1. floorplan/parser.py +19 -6
floorplan/parser.py CHANGED
@@ -42,7 +42,9 @@ def _image_to_base64(image: Image.Image | str | Path) -> str:
42
 
43
 
44
  def _extract_json_from_response(text: str) -> dict:
45
- """Extract JSON from a VLM response that may contain markdown code blocks."""
 
 
46
  text = text.strip()
47
  try:
48
  return json.loads(text)
@@ -182,12 +184,18 @@ class FloorPlanParser:
182
  def parse_image(self, image, detail="high", temperature=0.1):
183
  """Parse a floor plan image into the structured schema."""
184
  image_b64 = _image_to_base64(image)
 
 
 
 
 
 
185
  response = self.client.chat.completions.create(
186
- model=self.model, temperature=temperature, max_tokens=16384,
187
  messages=[
188
- {"role": "system", "content": PARSE_SYSTEM_PROMPT},
189
  {"role": "user", "content": [
190
- {"type": "text", "text": "Parse this floor plan image. Extract ALL walls with their centerlines, thicknesses, and any doors/windows on them. Use pixel coordinates. Be thorough — every wall segment matters for room detection."},
191
  {"type": "image_url", "image_url": {"url": image_b64, "detail": detail}},
192
  ]},
193
  ],
@@ -201,10 +209,15 @@ class FloorPlanParser:
201
  orig_b64 = _image_to_base64(original_image)
202
  overlay_b64 = _image_to_base64(overlay_image)
203
  schema_json = current_schema.model_dump_json(indent=2)
 
 
 
 
 
204
  response = self.client.chat.completions.create(
205
- model=self.model, temperature=temperature, max_tokens=8192,
206
  messages=[
207
- {"role": "system", "content": CORRECTION_SYSTEM_PROMPT},
208
  {"role": "user", "content": [
209
  {"type": "text", "text": f"Iteration {iteration}. Compare these two images and identify corrections needed.\n\nCurrent schema ({len(current_schema.walls)} walls, {sum(len(w.openings) for w in current_schema.walls)} openings):\n```json\n{schema_json}\n```\n\nImage 1 is the ORIGINAL floor plan. Image 2 is the OVERLAY (parsed schema rendered on the original)."},
210
  {"type": "image_url", "image_url": {"url": orig_b64, "detail": "high"}},
 
42
 
43
 
44
  def _extract_json_from_response(text: str) -> dict:
45
+ """Extract JSON from a VLM response that may contain markdown code blocks or thinking tokens."""
46
+ # Strip Qwen3-style thinking blocks
47
+ text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
48
  text = text.strip()
49
  try:
50
  return json.loads(text)
 
184
  def parse_image(self, image, detail="high", temperature=0.1):
185
  """Parse a floor plan image into the structured schema."""
186
  image_b64 = _image_to_base64(image)
187
+
188
+ # For Qwen3 models, prepend /no_think to disable extended reasoning
189
+ system_content = PARSE_SYSTEM_PROMPT
190
+ if "Qwen3" in self.model:
191
+ system_content = "/no_think\n" + system_content
192
+
193
  response = self.client.chat.completions.create(
194
+ model=self.model, temperature=temperature, max_tokens=8192,
195
  messages=[
196
+ {"role": "system", "content": system_content},
197
  {"role": "user", "content": [
198
+ {"type": "text", "text": "Parse this floor plan image. Extract ALL walls with their centerlines, thicknesses, and any doors/windows on them. Use pixel coordinates. Be thorough — every wall segment matters for room detection. Output ONLY the JSON, no explanation."},
199
  {"type": "image_url", "image_url": {"url": image_b64, "detail": detail}},
200
  ]},
201
  ],
 
209
  orig_b64 = _image_to_base64(original_image)
210
  overlay_b64 = _image_to_base64(overlay_image)
211
  schema_json = current_schema.model_dump_json(indent=2)
212
+
213
+ system_content = CORRECTION_SYSTEM_PROMPT
214
+ if "Qwen3" in self.model:
215
+ system_content = "/no_think\n" + system_content
216
+
217
  response = self.client.chat.completions.create(
218
+ model=self.model, temperature=temperature, max_tokens=4096,
219
  messages=[
220
+ {"role": "system", "content": system_content},
221
  {"role": "user", "content": [
222
  {"type": "text", "text": f"Iteration {iteration}. Compare these two images and identify corrections needed.\n\nCurrent schema ({len(current_schema.walls)} walls, {sum(len(w.openings) for w in current_schema.walls)} openings):\n```json\n{schema_json}\n```\n\nImage 1 is the ORIGINAL floor plan. Image 2 is the OVERLAY (parsed schema rendered on the original)."},
223
  {"type": "image_url", "image_url": {"url": orig_b64, "detail": "high"}},