| | """ |
| | Manual benchmark for the SemanticDeduplicator component. |
| | """ |
| |
|
| | import sys |
| | import logging |
| | from efficient_context.compression import SemanticDeduplicator |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | def main(): |
| | print("Testing SemanticDeduplicator") |
| | |
| | |
| | repetitive_text = """ |
| | Climate change is a significant global challenge. |
| | Global warming is affecting ecosystems worldwide. |
| | The Earth's temperature is rising due to human activities. |
| | Climate change poses a serious threat to our planet. |
| | Rising global temperatures are causing environmental problems. |
| | |
| | Renewable energy is key to a sustainable future. |
| | Clean energy sources help reduce carbon emissions. |
| | Sustainable power generation is vital for fighting climate change. |
| | Green energy technologies are becoming more affordable. |
| | Renewable resources provide alternatives to fossil fuels. |
| | """ |
| | |
| | print(f"Original text length: {len(repetitive_text.split())} words") |
| | |
| | |
| | for threshold in [0.7, 0.8, 0.85, 0.9, 0.95]: |
| | print(f"\nTesting threshold: {threshold}") |
| | |
| | deduplicator = SemanticDeduplicator(threshold=threshold) |
| | |
| | |
| | compressed_text = deduplicator.compress(repetitive_text) |
| | |
| | print(f"Compressed text length: {len(compressed_text.split())} words") |
| | print(f"Compression ratio: {len(compressed_text.split()) / len(repetitive_text.split()):.2f}") |
| | |
| | |
| | print(f"Compressed text (preview): {compressed_text[:100]}...") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|