Spaces:
Running
Running
File size: 2,479 Bytes
c9955a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
{
"current_model": {
"name": "Granite-107M-Multilingual",
"repo": "ibm-granite/granite-embedding-107m-multilingual",
"params": "107M",
"pros": [
"Already integrated and working",
"Fast (107M parameters)",
"Proven in production tests",
"Correctly deduplicated Gemma-3 (47.8% dupes)",
"0% false positives with Qwen2.5 1.5B"
],
"cons": [
"Smaller model (107M vs 500M+)",
"May miss nuanced similarities"
],
"test_results": {
"qwen2.5_1.5b_extraction": {
"duplicate_rate": "0%",
"deduplication_accuracy": "100%",
"note": "Extraction already unique per window"
},
"gemma3_1b_extraction": {
"duplicate_rate": "47.8%",
"deduplication_accuracy": "100%",
"note": "Correctly identified all duplicates"
}
}
},
"alternatives": {
"bge_m3": {
"name": "BGE-M3",
"repo": "BAAI/bge-m3",
"gguf_repo": "lm-kit/bge-m3-gguf",
"params": "568M",
"pros": [
"SOTA on MTEB Chinese benchmarks",
"Larger model (568M vs 107M)",
"Better semantic understanding"
],
"cons": [
"5x larger (slower)",
"Requires sentence-transformers (not GGUF)",
"Unknown if GGUF version works with llama-cpp"
],
"recommendation": "Worth testing if accuracy issues arise"
},
"multilingual_e5": {
"name": "Multilingual-E5-Large",
"repo": "intfloat/multilingual-e5-large",
"params": "560M",
"pros": [
"Microsoft-backed, widely tested",
"Excellent for multilingual",
"Good for Chinese text"
],
"cons": [
"5x larger than Granite-107M",
"Requires sentence-transformers",
"No GGUF version readily available"
],
"recommendation": "Consider if switching to sentence-transformers"
}
},
"recommendation": {
"current_status": "KEEP Granite-107M",
"rationale": [
"Working correctly in production",
"Fast enough for real-time use",
"Zero false positives in tests",
"Simple GGUF integration"
],
"when_to_upgrade": [
"If false positives/negatives appear in production",
"If need better semantic matching (not just exact duplicates)",
"If processing very long texts (need better context understanding)"
],
"suggested_thresholds": {
"strict": 0.9,
"default": 0.85,
"lenient": 0.8
}
}
} |