voidful commited on
Commit
a3caaa3
·
verified ·
1 Parent(s): 838da49

Require primary evidence for verified references

Browse files
Files changed (2) hide show
  1. main.py +36 -0
  2. src/comparator.py +3 -0
main.py CHANGED
@@ -551,11 +551,14 @@ def validate_entry(entry, workflow, fetchers, comparator):
551
  if data: result = comparator.compare(entry, data, "scholar")
552
 
553
  if result:
 
 
554
  results.append(result)
555
 
556
  if results:
557
  best = max(results, key=lambda r: r.confidence)
558
  _apply_cross_source_conflict_guard(best, results)
 
559
  return best, results
560
 
561
  # No results
@@ -592,6 +595,39 @@ def _apply_cross_source_conflict_guard(best, results) -> None:
592
  best.confidence = min(best.confidence, 0.8)
593
 
594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
 
596
 
597
 
 
551
  if data: result = comparator.compare(entry, data, "scholar")
552
 
553
  if result:
554
+ result.evidence_step = step.name
555
+ result.evidence_url = getattr(data, "url", "") if data else ""
556
  results.append(result)
557
 
558
  if results:
559
  best = max(results, key=lambda r: r.confidence)
560
  _apply_cross_source_conflict_guard(best, results)
561
+ _apply_evidence_guard(best, results)
562
  return best, results
563
 
564
  # No results
 
595
  best.confidence = min(best.confidence, 0.8)
596
 
597
 
598
+ def _apply_evidence_guard(best, results) -> None:
599
+ """Require primary evidence or at least two agreeing exact sources."""
600
+ if not best or not getattr(best, "is_match", False):
601
+ return
602
+
603
+ evidence_step = getattr(best, "evidence_step", "")
604
+ if evidence_step in {"arxiv_id", "arxiv_title", "crossref_doi"}:
605
+ return
606
+
607
+ best_year = str(getattr(best, "fetched_year", "") or "").strip()
608
+ agreeing_sources = {getattr(best, "source", "")}
609
+ for result in results:
610
+ if result is best or not getattr(result, "is_match", False):
611
+ continue
612
+ if getattr(result, "title_similarity", 0.0) < 0.95:
613
+ continue
614
+ other_year = str(getattr(result, "fetched_year", "") or "").strip()
615
+ if best_year and other_year == best_year:
616
+ agreeing_sources.add(getattr(result, "source", ""))
617
+
618
+ if len(agreeing_sources) >= 2:
619
+ return
620
+
621
+ issue = (
622
+ "Insufficient evidence: exact match found only in "
623
+ f"{best.source}; needs arXiv/DOI evidence or another agreeing source"
624
+ )
625
+ if issue not in best.issues:
626
+ best.issues.append(issue)
627
+ best.is_match = False
628
+ best.confidence = min(best.confidence, 0.8)
629
+
630
+
631
 
632
 
633
 
src/comparator.py CHANGED
@@ -191,6 +191,9 @@ class MetadataComparator:
191
  # --- Year Comparison ---
192
  bib_year = str(bib_entry.year).strip()
193
  fetched_year = str(getattr(fetched_data, 'year', '')).strip()
 
 
 
194
  year_match = bool(bib_year and fetched_year and bib_year == fetched_year)
195
 
196
  if not bib_year:
 
191
  # --- Year Comparison ---
192
  bib_year = str(bib_entry.year).strip()
193
  fetched_year = str(getattr(fetched_data, 'year', '')).strip()
194
+ conference_year = str(getattr(fetched_data, 'conference_year', '') or '').strip()
195
+ if source_name.startswith("arxiv") and conference_year and conference_year.isdigit():
196
+ fetched_year = conference_year
197
  year_match = bool(bib_year and fetched_year and bib_year == fetched_year)
198
 
199
  if not bib_year: