Spaces:

Ma-Ri-Ba-Ku
/

IIIF-Studio

Build error

IIIF-Studio / backend /tests /test_response_parser.py

Claude

perf: Sprint 2a — caching, parallel pipeline, async I/O, tests

5d9b495 unverified 18 days ago

5.11 kB

	"""
	Tests pour response_parser.py — extraction JSON, correction VLM, parsing tolérant.
	"""
	import pytest

	from app.services.ai.response_parser import (
	ParseError,
	_extract_json_object,
	_fix_common_json_issues,
	_try_parse_json,
	parse_ai_response,
	)


	# ── _extract_json_object ─────────────────────────────────────────────────────

	class TestExtractJsonObject:
	def test_simple_object(self):
	assert _extract_json_object('{"a": 1}') == '{"a": 1}'

	def test_text_before_json(self):
	result = _extract_json_object('Here is the JSON: {"a": 1}')
	assert result == '{"a": 1}'

	def test_text_after_json(self):
	result = _extract_json_object('{"a": 1} and more text')
	assert result == '{"a": 1}'

	def test_nested_braces(self):
	result = _extract_json_object('{"a": {"b": {"c": 1}}}')
	assert result == '{"a": {"b": {"c": 1}}}'

	def test_braces_inside_strings(self):
	result = _extract_json_object('{"text": "value with { and } inside"}')
	assert result == '{"text": "value with { and } inside"}'

	def test_escaped_quotes(self):
	result = _extract_json_object('{"text": "he said \\"hello\\""}')
	assert result == '{"text": "he said \\"hello\\""}'

	def test_no_json(self):
	result = _extract_json_object("no json here")
	assert result == "no json here"

	def test_unclosed_json(self):
	result = _extract_json_object('some text {"a": 1')
	assert result.startswith('{"a": 1')


	# ── _fix_common_json_issues ──────────────────────────────────────────────────

	class TestFixCommonJsonIssues:
	def test_trailing_comma_before_brace(self):
	assert _fix_common_json_issues('{"a": 1,}') == '{"a": 1}'

	def test_trailing_comma_before_bracket(self):
	assert _fix_common_json_issues('[1, 2,]') == '[1, 2]'

	def test_trailing_comma_with_whitespace(self):
	assert _fix_common_json_issues('{"a": 1 , }') == '{"a": 1 }'

	def test_no_issues(self):
	text = '{"a": 1, "b": 2}'
	assert _fix_common_json_issues(text) == text


	# ── _try_parse_json ──────────────────────────────────────────────────────────

	class TestTryParseJson:
	def test_valid_json(self):
	assert _try_parse_json('{"a": 1}') == {"a": 1}

	def test_json_with_trailing_comma(self):
	result = _try_parse_json('{"a": 1,}')
	assert result == {"a": 1}

	def test_invalid_json(self):
	assert _try_parse_json("not json at all") is None


	# ── parse_ai_response ────────────────────────────────────────────────────────

	class TestParseAiResponse:
	def test_clean_json(self):
	raw = '{"layout": {"regions": [{"id": "r1", "type": "text_block", "bbox": [10, 20, 100, 200], "confidence": 0.9}]}, "ocr": {"diplomatic_text": "hello", "confidence": 0.8}}'
	layout, ocr = parse_ai_response(raw)
	assert len(layout["regions"]) == 1
	assert layout["regions"][0]["id"] == "r1"
	assert ocr.diplomatic_text == "hello"

	def test_markdown_fenced_json(self):
	raw = '```json\n{"layout": {"regions": []}, "ocr": {"diplomatic_text": "test"}}\n```'
	layout, ocr = parse_ai_response(raw)
	assert layout["regions"] == []
	assert ocr.diplomatic_text == "test"

	def test_text_around_json(self):
	raw = 'Here is my analysis:\n{"layout": {"regions": []}, "ocr": {"diplomatic_text": "ok"}}\nHope this helps!'
	layout, ocr = parse_ai_response(raw)
	assert ocr.diplomatic_text == "ok"

	def test_invalid_region_skipped(self):
	raw = '{"layout": {"regions": [{"id": "r1", "type": "text_block", "bbox": [-1, 0, 100, 200], "confidence": 0.5}, {"id": "r2", "type": "miniature", "bbox": [10, 20, 100, 200], "confidence": 0.8}]}}'
	layout, ocr = parse_ai_response(raw)
	assert len(layout["regions"]) == 1
	assert layout["regions"][0]["id"] == "r2"

	def test_missing_ocr_returns_default(self):
	raw = '{"layout": {"regions": []}}'
	layout, ocr = parse_ai_response(raw)
	assert ocr.diplomatic_text == ""
	assert ocr.confidence == 0.0

	def test_not_json_raises_parse_error(self):
	with pytest.raises(ParseError):
	parse_ai_response("This is not JSON at all, no braces anywhere")

	def test_json_array_raises_parse_error(self):
	with pytest.raises(ParseError):
	parse_ai_response("[1, 2, 3]")

	def test_trailing_comma_tolerance(self):
	raw = '{"layout": {"regions": [],}, "ocr": {"diplomatic_text": "tolerant",}}'
	layout, ocr = parse_ai_response(raw)
	assert ocr.diplomatic_text == "tolerant"