Spaces:
Build error
Build error
File size: 5,110 Bytes
5d9b495 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | """
Tests pour response_parser.py β extraction JSON, correction VLM, parsing tolΓ©rant.
"""
import pytest
from app.services.ai.response_parser import (
ParseError,
_extract_json_object,
_fix_common_json_issues,
_try_parse_json,
parse_ai_response,
)
# ββ _extract_json_object βββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestExtractJsonObject:
def test_simple_object(self):
assert _extract_json_object('{"a": 1}') == '{"a": 1}'
def test_text_before_json(self):
result = _extract_json_object('Here is the JSON: {"a": 1}')
assert result == '{"a": 1}'
def test_text_after_json(self):
result = _extract_json_object('{"a": 1} and more text')
assert result == '{"a": 1}'
def test_nested_braces(self):
result = _extract_json_object('{"a": {"b": {"c": 1}}}')
assert result == '{"a": {"b": {"c": 1}}}'
def test_braces_inside_strings(self):
result = _extract_json_object('{"text": "value with { and } inside"}')
assert result == '{"text": "value with { and } inside"}'
def test_escaped_quotes(self):
result = _extract_json_object('{"text": "he said \\"hello\\""}')
assert result == '{"text": "he said \\"hello\\""}'
def test_no_json(self):
result = _extract_json_object("no json here")
assert result == "no json here"
def test_unclosed_json(self):
result = _extract_json_object('some text {"a": 1')
assert result.startswith('{"a": 1')
# ββ _fix_common_json_issues ββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestFixCommonJsonIssues:
def test_trailing_comma_before_brace(self):
assert _fix_common_json_issues('{"a": 1,}') == '{"a": 1}'
def test_trailing_comma_before_bracket(self):
assert _fix_common_json_issues('[1, 2,]') == '[1, 2]'
def test_trailing_comma_with_whitespace(self):
assert _fix_common_json_issues('{"a": 1 , }') == '{"a": 1 }'
def test_no_issues(self):
text = '{"a": 1, "b": 2}'
assert _fix_common_json_issues(text) == text
# ββ _try_parse_json ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestTryParseJson:
def test_valid_json(self):
assert _try_parse_json('{"a": 1}') == {"a": 1}
def test_json_with_trailing_comma(self):
result = _try_parse_json('{"a": 1,}')
assert result == {"a": 1}
def test_invalid_json(self):
assert _try_parse_json("not json at all") is None
# ββ parse_ai_response ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestParseAiResponse:
def test_clean_json(self):
raw = '{"layout": {"regions": [{"id": "r1", "type": "text_block", "bbox": [10, 20, 100, 200], "confidence": 0.9}]}, "ocr": {"diplomatic_text": "hello", "confidence": 0.8}}'
layout, ocr = parse_ai_response(raw)
assert len(layout["regions"]) == 1
assert layout["regions"][0]["id"] == "r1"
assert ocr.diplomatic_text == "hello"
def test_markdown_fenced_json(self):
raw = '```json\n{"layout": {"regions": []}, "ocr": {"diplomatic_text": "test"}}\n```'
layout, ocr = parse_ai_response(raw)
assert layout["regions"] == []
assert ocr.diplomatic_text == "test"
def test_text_around_json(self):
raw = 'Here is my analysis:\n{"layout": {"regions": []}, "ocr": {"diplomatic_text": "ok"}}\nHope this helps!'
layout, ocr = parse_ai_response(raw)
assert ocr.diplomatic_text == "ok"
def test_invalid_region_skipped(self):
raw = '{"layout": {"regions": [{"id": "r1", "type": "text_block", "bbox": [-1, 0, 100, 200], "confidence": 0.5}, {"id": "r2", "type": "miniature", "bbox": [10, 20, 100, 200], "confidence": 0.8}]}}'
layout, ocr = parse_ai_response(raw)
assert len(layout["regions"]) == 1
assert layout["regions"][0]["id"] == "r2"
def test_missing_ocr_returns_default(self):
raw = '{"layout": {"regions": []}}'
layout, ocr = parse_ai_response(raw)
assert ocr.diplomatic_text == ""
assert ocr.confidence == 0.0
def test_not_json_raises_parse_error(self):
with pytest.raises(ParseError):
parse_ai_response("This is not JSON at all, no braces anywhere")
def test_json_array_raises_parse_error(self):
with pytest.raises(ParseError):
parse_ai_response("[1, 2, 3]")
def test_trailing_comma_tolerance(self):
raw = '{"layout": {"regions": [],}, "ocr": {"diplomatic_text": "tolerant",}}'
layout, ocr = parse_ai_response(raw)
assert ocr.diplomatic_text == "tolerant"
|