Spaces:
Running
Running
| import re | |
| import random | |
| import nltk | |
| import numpy as np | |
| from typing import List, Dict, Optional | |
| import time | |
| from collections import Counter | |
| import statistics | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| from nltk.corpus import wordnet | |
| # Advanced imports with fallbacks | |
| def safe_import_with_fallback(module_name, component=None): | |
| """Safe import with fallback handling""" | |
| try: | |
| if component: | |
| module = __import__(module_name, fromlist=[component]) | |
| return getattr(module, component), True | |
| else: | |
| return __import__(module_name), True | |
| except ImportError: | |
| return None, False | |
| except Exception: | |
| return None, False | |
| # Load advanced models | |
| print("π Loading Universal AI Text Humanizer...") | |
| SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_fallback('sentence_transformers', 'SentenceTransformer') | |
| pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_fallback('transformers', 'pipeline') | |
| try: | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity | |
| SKLEARN_AVAILABLE = True | |
| except ImportError: | |
| SKLEARN_AVAILABLE = False | |
| try: | |
| import torch | |
| TORCH_AVAILABLE = True | |
| except ImportError: | |
| TORCH_AVAILABLE = False | |
| class UniversalAITextHumanizer: | |
| """ | |
| Universal AI Text Humanizer for All Business Use Cases | |
| Based on QuillBot and Walter Writes AI research | |
| Simplified interface with only Natural/Conversational modes | |
| """ | |
| def __init__(self, enable_gpu=True): | |
| print("π Initializing Universal AI Text Humanizer...") | |
| print("π― Designed for E-commerce, Marketing, SEO & All Business Needs") | |
| self.enable_gpu = enable_gpu and TORCH_AVAILABLE | |
| # Initialize models and databases | |
| self._load_models() | |
| self._initialize_universal_patterns() | |
| print("β Universal AI Text Humanizer ready for all use cases!") | |
| self._print_status() | |
| def _load_models(self): | |
| """Load AI models with graceful fallbacks""" | |
| self.similarity_model = None | |
| self.paraphraser = None | |
| # Load sentence transformer for quality control | |
| if SENTENCE_TRANSFORMERS_AVAILABLE: | |
| try: | |
| device = 'cuda' if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu' | |
| self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device=device) | |
| print("β Advanced similarity model loaded") | |
| except Exception as e: | |
| print(f"β οΈ Similarity model unavailable: {e}") | |
| # Load paraphrasing model | |
| if TRANSFORMERS_AVAILABLE: | |
| try: | |
| device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1 | |
| self.paraphraser = pipeline( | |
| "text2text-generation", | |
| model="google/flan-t5-small", | |
| device=device, | |
| max_length=256 | |
| ) | |
| print("β AI paraphrasing model loaded") | |
| except Exception as e: | |
| print(f"β οΈ Paraphrasing model unavailable: {e}") | |
| # Fallback similarity using TF-IDF | |
| if SKLEARN_AVAILABLE: | |
| self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000) | |
| else: | |
| self.tfidf_vectorizer = None | |
| def _initialize_universal_patterns(self): | |
| """Initialize patterns based on QuillBot & Walter Writes research""" | |
| # Universal word replacements (business-friendly) | |
| self.word_replacements = { | |
| # Formal business terms -> Natural alternatives | |
| "utilize": "use", "demonstrate": "show", "facilitate": "help", "implement": "set up", | |
| "consequently": "so", "furthermore": "also", "moreover": "plus", "nevertheless": "but", | |
| "subsequently": "then", "accordingly": "therefore", "regarding": "about", "concerning": "about", | |
| "approximately": "about", "endeavor": "try", "commence": "start", "terminate": "end", | |
| "obtain": "get", "purchase": "buy", "examine": "check", "analyze": "look at", | |
| "construct": "build", "establish": "create", "methodology": "method", "systematic": "organized", | |
| "comprehensive": "complete", "significant": "important", "substantial": "large", "optimal": "best", | |
| "sufficient": "enough", "adequate": "good", "exceptional": "great", "fundamental": "basic", | |
| "essential": "key", "crucial": "important", "paramount": "very important", "imperative": "must", | |
| "mandatory": "required", "optimization": "improvement", "enhancement": "upgrade", | |
| "implementation": "setup", "utilization": "use", "evaluation": "review", "assessment": "check", | |
| "validation": "proof", "verification": "confirmation", "consolidation": "combining", | |
| "integration": "merging", "transformation": "change", "modification": "change" | |
| } | |
| # AI-specific phrases to replace (QuillBot research) | |
| self.ai_phrase_replacements = { | |
| "it is important to note that": "notably", "it should be emphasized that": "importantly", | |
| "it is worth mentioning that": "by the way", "it is crucial to understand that": "remember", | |
| "from a practical standpoint": "practically", "in terms of implementation": "when implementing", | |
| "with respect to the aforementioned": "about this", "as previously mentioned": "as noted", | |
| "in light of this": "because of this", "it is imperative to understand": "you should know", | |
| "one must consider": "consider", "it is evident that": "clearly", "it can be observed that": "we can see", | |
| "upon careful consideration": "after thinking", "in the final analysis": "ultimately" | |
| } | |
| # Professional contractions (universal appeal) | |
| self.contractions = { | |
| "do not": "don't", "does not": "doesn't", "did not": "didn't", "will not": "won't", | |
| "would not": "wouldn't", "should not": "shouldn't", "could not": "couldn't", "cannot": "can't", | |
| "is not": "isn't", "are not": "aren't", "was not": "wasn't", "were not": "weren't", | |
| "have not": "haven't", "has not": "hasn't", "had not": "hadn't", "I am": "I'm", | |
| "you are": "you're", "he is": "he's", "she is": "she's", "it is": "it's", | |
| "we are": "we're", "they are": "they're", "I have": "I've", "you have": "you've", | |
| "we have": "we've", "they have": "they've", "I will": "I'll", "you will": "you'll", | |
| "we will": "we'll", "they will": "they'll" | |
| } | |
| # Natural transition words (Walter Writes research) | |
| self.natural_transitions = [ | |
| "Also", "Plus", "And", "Then", "So", "But", "However", "Still", "Now", "Well", | |
| "Actually", "Besides", "Additionally", "What's more", "On top of that", "Beyond that" | |
| ] | |
| def preserve_structure(self, original: str, processed: str) -> str: | |
| """Preserve original text structure (paragraphs, formatting)""" | |
| # Split by double newlines (paragraphs) | |
| original_paragraphs = re.split(r'\n\s*\n', original) | |
| if len(original_paragraphs) <= 1: | |
| return processed | |
| # Split processed text into sentences | |
| processed_sentences = sent_tokenize(processed) | |
| # Try to maintain paragraph structure | |
| result_paragraphs = [] | |
| sentence_idx = 0 | |
| for para in original_paragraphs: | |
| para_sentences = sent_tokenize(para) | |
| para_sentence_count = len(para_sentences) | |
| if sentence_idx + para_sentence_count <= len(processed_sentences): | |
| para_processed = ' '.join(processed_sentences[sentence_idx:sentence_idx + para_sentence_count]) | |
| result_paragraphs.append(para_processed) | |
| sentence_idx += para_sentence_count | |
| else: | |
| # Add remaining sentences to this paragraph | |
| remaining = ' '.join(processed_sentences[sentence_idx:]) | |
| if remaining: | |
| result_paragraphs.append(remaining) | |
| break | |
| return '\n\n'.join(result_paragraphs) | |
| def apply_word_replacements(self, text: str, intensity: float = 0.7) -> str: | |
| """Apply universal word replacements""" | |
| words = word_tokenize(text) | |
| modified_words = [] | |
| for word in words: | |
| word_clean = word.lower().strip('.,!?;:"') | |
| if word_clean in self.word_replacements and random.random() < intensity: | |
| replacement = self.word_replacements[word_clean] | |
| # Preserve case | |
| if word.isupper(): | |
| replacement = replacement.upper() | |
| elif word.istitle(): | |
| replacement = replacement.title() | |
| modified_words.append(replacement) | |
| else: | |
| modified_words.append(word) | |
| # Reconstruct with proper spacing | |
| result = "" | |
| for i, word in enumerate(modified_words): | |
| if i > 0 and word not in ".,!?;:\"')": | |
| result += " " | |
| result += word | |
| return result | |
| def apply_contractions(self, text: str, style: str, intensity: float = 0.6) -> str: | |
| """Apply contractions based on style""" | |
| if style == "natural" and intensity < 0.5: | |
| intensity *= 0.7 # Less aggressive for natural style | |
| for formal, contracted in self.contractions.items(): | |
| if random.random() < intensity: | |
| pattern = r'\b' + re.escape(formal) + r'\b' | |
| text = re.sub(pattern, contracted, text, flags=re.IGNORECASE) | |
| return text | |
| def replace_ai_phrases(self, text: str, intensity: float = 0.8) -> str: | |
| """Replace AI-specific phrases""" | |
| for ai_phrase, replacement in self.ai_phrase_replacements.items(): | |
| if ai_phrase in text.lower(): | |
| if random.random() < intensity: | |
| # Preserve case | |
| if ai_phrase[0].isupper() or text.find(ai_phrase.title()) != -1: | |
| replacement = replacement.capitalize() | |
| text = text.replace(ai_phrase, replacement) | |
| text = text.replace(ai_phrase.title(), replacement.title()) | |
| return text | |
| def vary_sentence_structure(self, text: str, style: str, intensity: float = 0.4) -> str: | |
| """Add sentence variety based on style""" | |
| sentences = sent_tokenize(text) | |
| varied_sentences = [] | |
| for sentence in sentences: | |
| if len(sentence.split()) > 8 and random.random() < intensity: | |
| # Add natural transitions occasionally | |
| if style == "conversational" and random.random() < 0.3: | |
| transition = random.choice(self.natural_transitions) | |
| sentence = transition + ", " + sentence.lower() | |
| # Split long sentences occasionally (Walter Writes technique) | |
| elif len(sentence.split()) > 15 and random.random() < 0.2: | |
| words = sentence.split() | |
| mid_point = len(words) // 2 | |
| # Find a natural break point | |
| for i in range(mid_point-2, mid_point+3): | |
| if i < len(words) and words[i].lower() in ['and', 'but', 'so', 'because']: | |
| first_part = ' '.join(words[:i]) + '.' | |
| second_part = ' '.join(words[i+1:]) | |
| if second_part: | |
| second_part = second_part[0].upper() + second_part[1:] | |
| varied_sentences.extend([first_part, second_part]) | |
| continue | |
| varied_sentences.append(sentence) | |
| return ' '.join(varied_sentences) | |
| def apply_advanced_paraphrasing(self, text: str, style: str, intensity: float = 0.3) -> str: | |
| """Apply AI paraphrasing if available""" | |
| if not self.paraphraser or intensity < 0.6: | |
| return text | |
| sentences = sent_tokenize(text) | |
| paraphrased_sentences = [] | |
| for sentence in sentences: | |
| if len(sentence.split()) > 10 and random.random() < intensity * 0.4: | |
| try: | |
| # Style-specific prompts | |
| if style == "conversational": | |
| prompt = f"Make this more conversational and natural: {sentence}" | |
| else: | |
| prompt = f"Rewrite this naturally: {sentence}" | |
| result = self.paraphraser( | |
| prompt, | |
| max_length=min(150, len(sentence) + 30), | |
| min_length=max(10, len(sentence) // 2), | |
| temperature=0.7, | |
| do_sample=True | |
| ) | |
| paraphrased = result[0]['generated_text'].replace(prompt, '').strip().strip('"\'') | |
| # Quality check | |
| if (paraphrased and len(paraphrased) > 5 and | |
| len(paraphrased) < len(sentence) * 1.8 and | |
| not paraphrased.lower().startswith(('sorry', 'i cannot'))): | |
| paraphrased_sentences.append(paraphrased) | |
| else: | |
| paraphrased_sentences.append(sentence) | |
| except Exception: | |
| paraphrased_sentences.append(sentence) | |
| else: | |
| paraphrased_sentences.append(sentence) | |
| return ' '.join(paraphrased_sentences) | |
| def calculate_similarity(self, text1: str, text2: str) -> float: | |
| """Calculate semantic similarity""" | |
| if self.similarity_model: | |
| try: | |
| embeddings1 = self.similarity_model.encode([text1]) | |
| embeddings2 = self.similarity_model.encode([text2]) | |
| similarity = np.dot(embeddings1[0], embeddings2[0]) / ( | |
| np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0]) | |
| ) | |
| return float(similarity) | |
| except Exception: | |
| pass | |
| # Fallback to TF-IDF | |
| if self.tfidf_vectorizer and SKLEARN_AVAILABLE: | |
| try: | |
| tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2]) | |
| similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] | |
| return float(similarity) | |
| except Exception: | |
| pass | |
| # Basic word overlap fallback | |
| words1 = set(word_tokenize(text1.lower())) | |
| words2 = set(word_tokenize(text2.lower())) | |
| if not words1 or not words2: | |
| return 1.0 if text1 == text2 else 0.0 | |
| intersection = words1.intersection(words2) | |
| union = words1.union(words2) | |
| return len(intersection) / len(union) if union else 1.0 | |
| def humanize_text_universal(self, | |
| text: str, | |
| style: str = "natural", | |
| intensity: float = 0.7) -> Dict: | |
| """ | |
| Universal text humanization for all business use cases | |
| Args: | |
| text: Input text to humanize | |
| style: 'natural' or 'conversational' | |
| intensity: Transformation intensity (0.0 to 1.0) | |
| Returns: | |
| Dictionary with results and metrics | |
| """ | |
| if not text.strip(): | |
| return { | |
| "original_text": text, | |
| "humanized_text": text, | |
| "similarity_score": 1.0, | |
| "changes_made": [], | |
| "processing_time_ms": 0.0, | |
| "style": style, | |
| "intensity": intensity, | |
| "structure_preserved": True | |
| } | |
| start_time = time.time() | |
| original_text = text | |
| humanized_text = text | |
| changes_made = [] | |
| # Phase 1: Replace AI-specific phrases | |
| if intensity > 0.2: | |
| before = humanized_text | |
| humanized_text = self.replace_ai_phrases(humanized_text, intensity * 0.9) | |
| if humanized_text != before: | |
| changes_made.append("Removed AI phrases") | |
| # Phase 2: Universal word replacements | |
| if intensity > 0.3: | |
| before = humanized_text | |
| humanized_text = self.apply_word_replacements(humanized_text, intensity * 0.8) | |
| if humanized_text != before: | |
| changes_made.append("Improved word choice") | |
| # Phase 3: Add contractions | |
| if intensity > 0.4: | |
| before = humanized_text | |
| humanized_text = self.apply_contractions(humanized_text, style, intensity * 0.7) | |
| if humanized_text != before: | |
| changes_made.append("Added natural contractions") | |
| # Phase 4: Vary sentence structure | |
| if intensity > 0.5: | |
| before = humanized_text | |
| humanized_text = self.vary_sentence_structure(humanized_text, style, intensity * 0.4) | |
| if humanized_text != before: | |
| changes_made.append("Improved sentence flow") | |
| # Phase 5: Advanced paraphrasing (if available and high intensity) | |
| if intensity > 0.7 and self.paraphraser: | |
| before = humanized_text | |
| humanized_text = self.apply_advanced_paraphrasing(humanized_text, style, intensity) | |
| if humanized_text != before: | |
| changes_made.append("Enhanced with AI paraphrasing") | |
| # Phase 6: Preserve structure | |
| humanized_text = self.preserve_structure(original_text, humanized_text) | |
| # Calculate quality metrics | |
| similarity_score = self.calculate_similarity(original_text, humanized_text) | |
| processing_time = (time.time() - start_time) * 1000 | |
| # Quality control - revert if too different | |
| if similarity_score < 0.7: | |
| print(f"β οΈ Similarity too low ({similarity_score:.3f}), reverting changes") | |
| humanized_text = original_text | |
| similarity_score = 1.0 | |
| changes_made = ["Reverted - maintained original meaning"] | |
| return { | |
| "original_text": original_text, | |
| "humanized_text": humanized_text, | |
| "similarity_score": similarity_score, | |
| "changes_made": changes_made, | |
| "processing_time_ms": processing_time, | |
| "style": style, | |
| "intensity": intensity, | |
| "structure_preserved": True, | |
| "word_count_original": len(original_text.split()), | |
| "word_count_humanized": len(humanized_text.split()), | |
| "character_count_original": len(original_text), | |
| "character_count_humanized": len(humanized_text) | |
| } | |
| def _print_status(self): | |
| """Print current status""" | |
| print("\nπ UNIVERSAL AI TEXT HUMANIZER STATUS:") | |
| print("-" * 45) | |
| print(f"π§ Advanced Similarity: {'β ' if self.similarity_model else 'β'}") | |
| print(f"π€ AI Paraphrasing: {'β ' if self.paraphraser else 'β'}") | |
| print(f"π TF-IDF Fallback: {'β ' if self.tfidf_vectorizer else 'β'}") | |
| print(f"π GPU Acceleration: {'β ' if self.enable_gpu else 'β'}") | |
| print(f"π Universal Patterns: β LOADED") | |
| print(f"π Word Replacements: β {len(self.word_replacements)} mappings") | |
| print(f"π€ AI Phrase Detection: β {len(self.ai_phrase_replacements)} patterns") | |
| print(f"π¬ Contractions: β {len(self.contractions)} patterns") | |
| print(f"ποΈ Structure Preservation: β ENABLED") | |
| # Calculate feature completeness | |
| features = [ | |
| bool(self.similarity_model), | |
| bool(self.paraphraser), | |
| bool(self.tfidf_vectorizer), | |
| True, # Universal patterns | |
| True, # Structure preservation | |
| True # Quality control | |
| ] | |
| completeness = (sum(features) / len(features)) * 100 | |
| print(f"π― System Completeness: {completeness:.1f}%") | |
| if completeness >= 80: | |
| print("π READY FOR ALL BUSINESS USE CASES!") | |
| elif completeness >= 60: | |
| print("β Core features ready - some advanced features may be limited") | |
| else: | |
| print("β οΈ Basic mode - install additional dependencies for full features") | |
| # Test function | |
| if __name__ == "__main__": | |
| humanizer = UniversalAITextHumanizer() | |
| # Test cases for different business scenarios | |
| test_cases = [ | |
| { | |
| "name": "E-commerce Product Description", | |
| "text": "Furthermore, this product demonstrates exceptional quality and utilizes advanced materials to ensure optimal performance. Subsequently, customers will experience significant improvements in their daily activities.", | |
| "style": "natural" | |
| }, | |
| { | |
| "name": "Marketing Copy", | |
| "text": "Moreover, our comprehensive solution facilitates unprecedented optimization of business processes. Therefore, organizations should implement our platform to obtain optimal results.", | |
| "style": "conversational" | |
| }, | |
| { | |
| "name": "SEO Blog Content", | |
| "text": "It is important to note that search engine optimization requires systematic approaches. Subsequently, websites must utilize comprehensive strategies to enhance their visibility.", | |
| "style": "natural" | |
| } | |
| ] | |
| print(f"\nπ§ͺ TESTING UNIVERSAL HUMANIZER") | |
| print("=" * 40) | |
| for i, test_case in enumerate(test_cases, 1): | |
| print(f"\n㪠Test {i}: {test_case['name']}") | |
| print("-" * 50) | |
| print(f"π Original: {test_case['text']}") | |
| result = humanizer.humanize_text_universal( | |
| text=test_case['text'], | |
| style=test_case['style'], | |
| intensity=0.7 | |
| ) | |
| print(f"β¨ Humanized: {result['humanized_text']}") | |
| print(f"π Similarity: {result['similarity_score']:.3f}") | |
| print(f"β‘ Processing: {result['processing_time_ms']:.1f}ms") | |
| print(f"π§ Changes: {', '.join(result['changes_made'])}") | |
| print(f"\nπ Universal testing completed!") | |
| print(f"π Ready for E-commerce, Marketing, SEO & All Business Use Cases!") |