rikhoffbauer2 commited on
Commit
75a0e92
·
verified ·
1 Parent(s): 78b9c5f

Upload floorplan/parser.py

Browse files
Files changed (1) hide show
  1. floorplan/parser.py +336 -0
floorplan/parser.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VLM-based floor plan parser — extract structured wall/door/window data from floor plan images.
3
+
4
+ Uses a vision-language model (via OpenAI-compatible API) to:
5
+ 1. Parse the floor plan image into the schema format
6
+ 2. Optionally compare rendered overlay with original for corrections
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import base64
12
+ import io
13
+ import json
14
+ import re
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ from PIL import Image
19
+
20
+ from .schema import (
21
+ FloorPlan,
22
+ Wall,
23
+ Opening,
24
+ OpeningType,
25
+ Point2D,
26
+ CorrectionResult,
27
+ Correction,
28
+ CorrectionAction,
29
+ )
30
+ from .geometry import build_rooms, compute_floor_plan_geometry
31
+
32
+
33
+ def _image_to_base64(image: Image.Image | str | Path) -> str:
34
+ """Convert image to base64 data URI."""
35
+ if isinstance(image, (str, Path)):
36
+ image = Image.open(image)
37
+ image = image.convert("RGB")
38
+ buf = io.BytesIO()
39
+ image.save(buf, format="PNG")
40
+ b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
41
+ return f"data:image/png;base64,{b64}"
42
+
43
+
44
+ def _extract_json_from_response(text: str) -> dict:
45
+ """Extract JSON from a VLM response that may contain markdown code blocks."""
46
+ text = text.strip()
47
+ try:
48
+ return json.loads(text)
49
+ except json.JSONDecodeError:
50
+ pass
51
+ patterns = [
52
+ r"```json\s*\n?(.*?)\n?\s*```",
53
+ r"```\s*\n?(.*?)\n?\s*```",
54
+ r"\{.*\}",
55
+ ]
56
+ for pattern in patterns:
57
+ match = re.search(pattern, text, re.DOTALL)
58
+ if match:
59
+ try:
60
+ candidate = match.group(1) if match.lastindex else match.group(0)
61
+ return json.loads(candidate)
62
+ except (json.JSONDecodeError, IndexError):
63
+ continue
64
+ raise ValueError(f"Could not extract JSON from response:\n{text[:500]}")
65
+
66
+
67
+ PARSE_SYSTEM_PROMPT = """You are an expert architectural floor plan parser. Your job is to analyze floor plan images and extract structured data about walls, doors, and windows.
68
+
69
+ IMPORTANT RULES:
70
+ 1. All coordinates are in a consistent unit system (use the image pixel coordinates, we'll convert to meters later)
71
+ 2. Walls are defined by their centerline (a polyline) and thickness
72
+ 3. Doors and windows are defined by their position ALONG a wall's centerline
73
+ 4. Each wall must connect to adjacent walls (shared endpoints or very close endpoints)
74
+ 5. The wall network must form closed loops (rooms)
75
+ 6. Exterior walls are typically thicker (15-30 pixels) than interior walls (8-15 pixels)
76
+ 7. Curved walls should be approximated as polylines with many points
77
+
78
+ OUTPUT FORMAT:
79
+ You must output valid JSON matching this exact schema:
80
+ {
81
+ "walls": [
82
+ {
83
+ "id": "w1",
84
+ "centerline": [{"x": 100, "y": 50}, {"x": 500, "y": 50}],
85
+ "thickness": 20,
86
+ "openings": [
87
+ {
88
+ "id": "d1",
89
+ "type": "door",
90
+ "start": 50,
91
+ "length": 80
92
+ }
93
+ ]
94
+ }
95
+ ]
96
+ }
97
+
98
+ WALL DETECTION GUIDELINES:
99
+ - Trace the CENTER of each wall, not its edges
100
+ - Each wall segment should be a straight run between junctions/corners
101
+ - At T-junctions and L-corners, start a new wall segment
102
+ - Estimate thickness by measuring the visible wall width in pixels
103
+ - Be precise with coordinates — they will be overlaid on the original image for verification
104
+
105
+ OPENING DETECTION GUIDELINES:
106
+ - "start" = distance from the first centerline point along the wall to where the opening begins
107
+ - "length" = length of the opening along the wall
108
+ - Doors typically show an arc swing or a gap in the wall
109
+ - Windows typically show parallel lines or a different fill pattern within the wall
110
+ - Double-check that start + length doesn't exceed the wall's total centerline length"""
111
+
112
+
113
+ CORRECTION_SYSTEM_PROMPT = """You are an expert architectural floor plan verifier. You are shown two images:
114
+
115
+ 1. The ORIGINAL floor plan image
116
+ 2. An OVERLAY showing the parsed schema rendered on top of the original
117
+
118
+ Your job is to compare them and identify discrepancies. Look for:
119
+ - Walls that are misaligned (rendered wall doesn't match original wall position)
120
+ - Missing walls (visible in original but not in the overlay)
121
+ - Extra walls (in overlay but not in original)
122
+ - Wrong wall thickness (too thick or too thin compared to original)
123
+ - Misplaced doors/windows (wrong position along the wall)
124
+ - Missing doors/windows (visible in original but not detected)
125
+ - Wrong opening sizes (too wide or too narrow)
126
+
127
+ OUTPUT FORMAT:
128
+ {
129
+ "score": 0.85,
130
+ "converged": false,
131
+ "corrections": [
132
+ {
133
+ "action": "modify",
134
+ "target": "w3",
135
+ "field": "centerline",
136
+ "value": [{"x": 150, "y": 200}, {"x": 150, "y": 500}],
137
+ "reason": "Wall is about 20 pixels too far to the right"
138
+ },
139
+ {
140
+ "action": "add",
141
+ "target": null,
142
+ "field": null,
143
+ "value": {"id": "w11", "centerline": [{"x": 100, "y": 300}, {"x": 400, "y": 300}], "thickness": 12, "openings": []},
144
+ "reason": "Interior partition wall visible in original but missing from parse"
145
+ },
146
+ {
147
+ "action": "delete",
148
+ "target": "w5",
149
+ "field": null,
150
+ "value": null,
151
+ "reason": "This is not a wall - it's a staircase outline"
152
+ }
153
+ ]
154
+ }
155
+
156
+ SCORING GUIDE:
157
+ - 1.0: Perfect alignment, no corrections needed
158
+ - 0.9+: Minor tweaks only (small position adjustments)
159
+ - 0.7-0.9: Some walls need repositioning, maybe 1-2 missing
160
+ - 0.5-0.7: Major misalignments, several missing/extra walls
161
+ - <0.5: Fundamental errors, most walls wrong
162
+
163
+ Set "converged": true if score >= 0.92 or if remaining corrections are cosmetic only."""
164
+
165
+
166
+ class FloorPlanParser:
167
+ """VLM-based floor plan parser with iterative correction."""
168
+
169
+ def __init__(self, api_key=None, base_url=None, model="gpt-4o"):
170
+ try:
171
+ from openai import OpenAI
172
+ except ImportError:
173
+ raise ImportError("openai package required: pip install openai")
174
+ kwargs = {}
175
+ if api_key:
176
+ kwargs["api_key"] = api_key
177
+ if base_url:
178
+ kwargs["base_url"] = base_url
179
+ self.client = OpenAI(**kwargs)
180
+ self.model = model
181
+
182
+ def parse_image(self, image, detail="high", temperature=0.1):
183
+ """Parse a floor plan image into the structured schema."""
184
+ image_b64 = _image_to_base64(image)
185
+ response = self.client.chat.completions.create(
186
+ model=self.model, temperature=temperature, max_tokens=16384,
187
+ messages=[
188
+ {"role": "system", "content": PARSE_SYSTEM_PROMPT},
189
+ {"role": "user", "content": [
190
+ {"type": "text", "text": "Parse this floor plan image. Extract ALL walls with their centerlines, thicknesses, and any doors/windows on them. Use pixel coordinates. Be thorough — every wall segment matters for room detection."},
191
+ {"type": "image_url", "image_url": {"url": image_b64, "detail": detail}},
192
+ ]},
193
+ ],
194
+ )
195
+ raw = response.choices[0].message.content
196
+ data = _extract_json_from_response(raw)
197
+ return self._data_to_floorplan(data)
198
+
199
+ def critique_overlay(self, original_image, overlay_image, current_schema, iteration=1, temperature=0.2):
200
+ """Compare overlay with original and generate corrections."""
201
+ orig_b64 = _image_to_base64(original_image)
202
+ overlay_b64 = _image_to_base64(overlay_image)
203
+ schema_json = current_schema.model_dump_json(indent=2)
204
+ response = self.client.chat.completions.create(
205
+ model=self.model, temperature=temperature, max_tokens=8192,
206
+ messages=[
207
+ {"role": "system", "content": CORRECTION_SYSTEM_PROMPT},
208
+ {"role": "user", "content": [
209
+ {"type": "text", "text": f"Iteration {iteration}. Compare these two images and identify corrections needed.\n\nCurrent schema ({len(current_schema.walls)} walls, {sum(len(w.openings) for w in current_schema.walls)} openings):\n```json\n{schema_json}\n```\n\nImage 1 is the ORIGINAL floor plan. Image 2 is the OVERLAY (parsed schema rendered on the original)."},
210
+ {"type": "image_url", "image_url": {"url": orig_b64, "detail": "high"}},
211
+ {"type": "image_url", "image_url": {"url": overlay_b64, "detail": "high"}},
212
+ ]},
213
+ ],
214
+ )
215
+ raw = response.choices[0].message.content
216
+ data = _extract_json_from_response(raw)
217
+ return CorrectionResult(
218
+ iteration=iteration, score=data.get("score", 0.5), converged=data.get("converged", False),
219
+ corrections=[
220
+ Correction(action=CorrectionAction(c["action"]), target=c.get("target"),
221
+ field=c.get("field"), value=c.get("value"), reason=c["reason"])
222
+ for c in data.get("corrections", [])
223
+ ],
224
+ )
225
+
226
+ def apply_corrections(self, floorplan, corrections):
227
+ """Apply corrections to a floor plan schema. Returns (new_floorplan, room_polygons)."""
228
+ walls_dict = {w.id: w.model_dump() for w in floorplan.walls}
229
+ for correction in corrections.corrections:
230
+ if correction.action == CorrectionAction.DELETE:
231
+ if correction.target and correction.target in walls_dict:
232
+ del walls_dict[correction.target]
233
+ elif correction.action == CorrectionAction.ADD:
234
+ if correction.value and isinstance(correction.value, dict):
235
+ wall_data = correction.value
236
+ wall_id = wall_data.get("id", f"w_new_{len(walls_dict)}")
237
+ walls_dict[wall_id] = wall_data
238
+ elif correction.action == CorrectionAction.MODIFY:
239
+ if not correction.target:
240
+ continue
241
+ parts = correction.target.split(".")
242
+ wall_id = parts[0]
243
+ if wall_id not in walls_dict:
244
+ continue
245
+ if len(parts) == 1 and correction.field:
246
+ walls_dict[wall_id][correction.field] = correction.value
247
+ elif len(parts) == 3 and parts[1] == "openings":
248
+ opening_id = parts[2]
249
+ for op in walls_dict[wall_id].get("openings", []):
250
+ if op.get("id") == opening_id and correction.field:
251
+ op[correction.field] = correction.value
252
+ break
253
+ new_walls = []
254
+ for wall_data in walls_dict.values():
255
+ try:
256
+ new_walls.append(self._parse_wall(wall_data))
257
+ except Exception as e:
258
+ print(f"Warning: skipping wall due to error: {e}")
259
+ continue
260
+ rooms, room_polygons = build_rooms(new_walls, min_area=1.0)
261
+ return FloorPlan(scale=floorplan.scale, origin=floorplan.origin, walls=new_walls, rooms=rooms), room_polygons
262
+
263
+ def _data_to_floorplan(self, data):
264
+ """Convert raw parsed JSON to FloorPlan with rooms detected."""
265
+ walls = []
266
+ for w in data.get("walls", []):
267
+ try:
268
+ walls.append(self._parse_wall(w))
269
+ except Exception as e:
270
+ print(f"Warning: skipping wall {w.get('id', '?')}: {e}")
271
+ continue
272
+ rooms, room_polygons = build_rooms(walls, min_area=100.0)
273
+ fp = FloorPlan(walls=walls, rooms=rooms)
274
+ return fp, room_polygons
275
+
276
+ @staticmethod
277
+ def _parse_wall(w):
278
+ """Parse a wall from dict, handling various coordinate formats."""
279
+ centerline = []
280
+ for pt in w["centerline"]:
281
+ if isinstance(pt, dict):
282
+ centerline.append(Point2D(x=pt["x"], y=pt["y"]))
283
+ elif isinstance(pt, (list, tuple)):
284
+ centerline.append(Point2D(x=pt[0], y=pt[1]))
285
+ openings = []
286
+ for o in w.get("openings", []):
287
+ openings.append(Opening(id=o["id"], type=OpeningType(o["type"]),
288
+ start=float(o["start"]), length=float(o["length"])))
289
+ return Wall(id=w["id"], centerline=centerline, thickness=float(w["thickness"]), openings=openings)
290
+
291
+
292
+ def parse_floorplan(image, api_key=None, base_url=None, model="gpt-4o",
293
+ max_iterations=4, convergence_threshold=0.92, verbose=True):
294
+ """Full parse-render-correct pipeline.
295
+
296
+ Returns (final_floorplan, room_polygons, overlay_images_per_iteration)
297
+ """
298
+ from .renderer import render_to_image, overlay_on_image
299
+ parser = FloorPlanParser(api_key=api_key, base_url=base_url, model=model)
300
+ if verbose:
301
+ print("🔍 Parsing floor plan image...")
302
+ floorplan, room_polygons = parser.parse_image(image)
303
+ if verbose:
304
+ print(f" Found {len(floorplan.walls)} walls, "
305
+ f"{sum(len(w.openings) for w in floorplan.walls)} openings, "
306
+ f"{len(floorplan.rooms)} rooms")
307
+ overlays = []
308
+ for iteration in range(1, max_iterations + 1):
309
+ if verbose:
310
+ print(f"\n📐 Iteration {iteration}: rendering overlay...")
311
+ overlay = overlay_on_image(floorplan, image, room_polygons=room_polygons,
312
+ schema_opacity=0.6, original_opacity=0.7)
313
+ overlays.append(overlay)
314
+ if verbose:
315
+ print(f" Comparing overlay with original...")
316
+ critique = parser.critique_overlay(original_image=image, overlay_image=overlay,
317
+ current_schema=floorplan, iteration=iteration)
318
+ if verbose:
319
+ print(f" Score: {critique.score:.2f}, Corrections: {len(critique.corrections)}, Converged: {critique.converged}")
320
+ for c in critique.corrections:
321
+ print(f" - [{c.action.value}] {c.target or 'new'}: {c.reason}")
322
+ if critique.converged or critique.score >= convergence_threshold:
323
+ if verbose:
324
+ print(f"\n✅ Converged at iteration {iteration} (score={critique.score:.2f})")
325
+ break
326
+ if not critique.corrections:
327
+ if verbose:
328
+ print(f"\n⚠️ No corrections suggested, stopping.")
329
+ break
330
+ floorplan, room_polygons = parser.apply_corrections(floorplan, critique)
331
+ if verbose:
332
+ print(f" Applied corrections → {len(floorplan.walls)} walls, {len(floorplan.rooms)} rooms")
333
+ final_overlay = overlay_on_image(floorplan, image, room_polygons=room_polygons,
334
+ schema_opacity=0.6, original_opacity=0.7)
335
+ overlays.append(final_overlay)
336
+ return floorplan, room_polygons, overlays