syscred_duplicate / syscred /liar_benchmark_test.json
D Ф m i И i q ц e L Ф y e r
Deploy SysCRED v2.3.1 - GraphRAG + LIAR benchmark + TREC integration
8e97fc5
{
"timestamp": "2026-02-03T11:06:54.379689",
"dataset": "LIAR",
"metrics": {
"total_statements": 100,
"successful_evaluations": 100,
"error_count": 0,
"error_rate": 0.0,
"binary": {
"accuracy": 0.57,
"precision": 1.0,
"recall": 0.1568627450980392,
"f1": 0.2711864406779661,
"confusion_matrix": [
[
8,
43
],
[
0,
49
]
]
},
"ternary": {
"accuracy": 0.34,
"macro_f1": 0.18686868686868685,
"confusion_matrix": [
[
0,
35,
0
],
[
0,
33,
0
],
[
0,
31,
1
]
]
},
"classification_report": {
"Fake": {
"precision": 1.0,
"recall": 0.1568627450980392,
"f1-score": 0.2711864406779661,
"support": 51.0
},
"Real": {
"precision": 0.532608695652174,
"recall": 1.0,
"f1-score": 0.6950354609929078,
"support": 49.0
},
"accuracy": 0.57,
"macro avg": {
"precision": 0.7663043478260869,
"recall": 0.5784313725490196,
"f1-score": 0.483110950835437,
"support": 100.0
},
"weighted avg": {
"precision": 0.7709782608695653,
"recall": 0.57,
"f1-score": 0.4788724606322875,
"support": 100.0
}
},
"score_distribution": {
"mean": 0.5134000000000001,
"min": 0.35,
"max": 0.69,
"median": 0.52
},
"per_party": {
"republican": {
"count": 43,
"accuracy": 0.5581395348837209
},
"democrat": {
"count": 27,
"accuracy": 0.6296296296296297
}
},
"elapsed_time": 108.7882571220398,
"statements_per_second": 0.9192168589282478
},
"config": {
"threshold": 0.5,
"use_graphrag": true,
"weights": {
"source_reputation": 0.22,
"domain_age": 0.08,
"sentiment_neutrality": 0.13,
"entity_presence": 0.13,
"coherence": 0.12,
"fact_check": 0.17,
"graph_context": 0.15
}
}
}