Spaces:
Running
Running
| import re | |
| import random | |
| import nltk | |
| import numpy as np | |
| from typing import List, Dict, Optional | |
| import time | |
| from collections import Counter | |
| import statistics | |
| # Robust NLTK data downloader that handles version differences | |
| def ensure_nltk_data(): | |
| """Download required NLTK data with fallbacks for different versions""" | |
| # Resources to download (try both old and new names) | |
| resources_to_try = [ | |
| # Punkt tokenizer (try both versions) | |
| [('punkt', 'tokenizers/punkt'), ('punkt_tab', 'tokenizers/punkt_tab')], | |
| # Wordnet | |
| [('wordnet', 'corpora/wordnet')], | |
| # OMW data | |
| [('omw-1.4', 'corpora/omw-1.4')] | |
| ] | |
| for resource_group in resources_to_try: | |
| downloaded = False | |
| for resource_name, resource_path in resource_group: | |
| try: | |
| nltk.data.find(resource_path) | |
| print(f"β Found {resource_name}") | |
| downloaded = True | |
| break | |
| except LookupError: | |
| try: | |
| print(f"π Downloading {resource_name}...") | |
| nltk.download(resource_name, quiet=True) | |
| print(f"β Downloaded {resource_name}") | |
| downloaded = True | |
| break | |
| except Exception as e: | |
| print(f"β οΈ Failed to download {resource_name}: {e}") | |
| continue | |
| if not downloaded: | |
| resource_names = [name for name, _ in resource_group] | |
| print(f"β Could not download any of: {resource_names}") | |
| # Alternative function that tries multiple approaches | |
| def robust_nltk_setup(): | |
| """More robust NLTK setup with multiple fallback strategies""" | |
| print("π§ Setting up NLTK resources...") | |
| # Strategy 1: Try standard downloads | |
| try: | |
| ensure_nltk_data() | |
| except Exception as e: | |
| print(f"β οΈ Standard setup failed: {e}") | |
| # Strategy 2: Force download common resources | |
| common_resources = ['punkt', 'punkt_tab', 'wordnet', 'omw-1.4', 'averaged_perceptron_tagger'] | |
| for resource in common_resources: | |
| try: | |
| nltk.download(resource, quiet=True) | |
| print(f"β Force downloaded {resource}") | |
| except Exception as e: | |
| print(f"β οΈ Could not force download {resource}: {e}") | |
| # Strategy 3: Test if tokenization works | |
| try: | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| # Test with a simple sentence | |
| test_sentences = sent_tokenize("This is a test. This is another test.") | |
| test_words = word_tokenize("This is a test sentence.") | |
| print(f"β Tokenization test passed: {len(test_sentences)} sentences, {len(test_words)} words") | |
| return True | |
| except Exception as e: | |
| print(f"β Tokenization test failed: {e}") | |
| return False | |
| # Run the robust setup | |
| print("π Loading Authentic AI Text Humanizer...") | |
| setup_success = robust_nltk_setup() | |
| # Try importing NLTK functions with fallbacks | |
| try: | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| from nltk.corpus import wordnet | |
| print("β NLTK imports successful") | |
| NLTK_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"β NLTK imports failed: {e}") | |
| print("π Trying alternative tokenization methods...") | |
| NLTK_AVAILABLE = False | |
| # Fallback tokenization functions | |
| def sent_tokenize(text): | |
| """Fallback sentence tokenizer""" | |
| import re | |
| # Simple sentence splitting on periods, exclamation marks, question marks | |
| sentences = re.split(r'[.!?]+', text) | |
| return [s.strip() for s in sentences if s.strip()] | |
| def word_tokenize(text): | |
| """Fallback word tokenizer""" | |
| import re | |
| # Simple word splitting on whitespace and punctuation | |
| words = re.findall(r'\b\w+\b|[^\w\s]', text) | |
| return words | |
| # Mock wordnet for fallback | |
| class MockWordNet: | |
| def synsets(self, word): | |
| return [] | |
| wordnet = MockWordNet() | |
| # Advanced imports with fallbacks | |
| def safe_import_with_fallback(module_name, component=None): | |
| """Safe import with fallback handling""" | |
| try: | |
| if component: | |
| module = __import__(module_name, fromlist=[component]) | |
| return getattr(module, component), True | |
| else: | |
| return __import__(module_name), True | |
| except ImportError: | |
| return None, False | |
| except Exception: | |
| return None, False | |
| # Load advanced models | |
| SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_fallback('sentence_transformers', 'SentenceTransformer') | |
| pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_fallback('transformers', 'pipeline') | |
| try: | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity | |
| SKLEARN_AVAILABLE = True | |
| except ImportError: | |
| SKLEARN_AVAILABLE = False | |
| try: | |
| import torch | |
| TORCH_AVAILABLE = True | |
| except ImportError: | |
| TORCH_AVAILABLE = False | |
| class AuthenticAITextHumanizer: | |
| """ | |
| Authentic AI Text Humanizer - Makes text truly sound human and natural | |
| Based on analysis of authentic human writing patterns | |
| """ | |
| def __init__(self, enable_gpu=True): | |
| print("π― Initializing Authentic AI Text Humanizer...") | |
| print("β¨ Designed to write like a real human - authentic & natural") | |
| self.enable_gpu = enable_gpu and TORCH_AVAILABLE | |
| self.nltk_available = NLTK_AVAILABLE | |
| # Initialize models and authentic patterns | |
| self._load_models() | |
| self._initialize_authentic_patterns() | |
| print("β Authentic AI Text Humanizer ready!") | |
| self._print_status() | |
| def _load_models(self): | |
| """Load AI models with graceful fallbacks""" | |
| self.similarity_model = None | |
| self.paraphraser = None | |
| # Load sentence transformer for quality control | |
| if SENTENCE_TRANSFORMERS_AVAILABLE: | |
| try: | |
| device = 'cuda' if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu' | |
| self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device=device) | |
| print("β Advanced similarity model loaded") | |
| except Exception as e: | |
| print(f"β οΈ Similarity model unavailable: {e}") | |
| # Load paraphrasing model | |
| if TRANSFORMERS_AVAILABLE: | |
| try: | |
| device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1 | |
| self.paraphraser = pipeline( | |
| "text2text-generation", | |
| model="google/flan-t5-small", | |
| device=device, | |
| max_length=256 | |
| ) | |
| print("β AI paraphrasing model loaded") | |
| except Exception as e: | |
| print(f"β οΈ Paraphrasing model unavailable: {e}") | |
| # Fallback similarity using TF-IDF | |
| if SKLEARN_AVAILABLE: | |
| self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000) | |
| else: | |
| self.tfidf_vectorizer = None | |
| def _initialize_authentic_patterns(self): | |
| """Initialize authentic human writing patterns""" | |
| # Authentic word replacements - how humans actually write | |
| self.authentic_replacements = { | |
| # Business jargon -> Natural language | |
| "utilize": ["use", "work with", "employ"], | |
| "facilitate": ["help", "make it easier to", "enable", "allow"], | |
| "demonstrate": ["show", "prove", "reveal", "display"], | |
| "implement": ["put in place", "start using", "set up", "roll out"], | |
| "optimize": ["improve", "make better", "enhance"], | |
| "leverage": ["use", "take advantage of", "make use of"], | |
| "comprehensive": ["complete", "thorough", "full", "extensive"], | |
| "substantial": ["significant", "major", "big", "considerable"], | |
| "exceptional": ["outstanding", "remarkable", "impressive", "excellent"], | |
| "systematic": ["structured", "organized", "methodical"], | |
| "revolutionary": ["groundbreaking", "innovative", "cutting-edge", "game-changing"], | |
| "unprecedented": ["never-before-seen", "unique", "extraordinary", "first-of-its-kind"], | |
| "methodology": ["approach", "method", "way", "strategy"], | |
| "enhancement": ["improvement", "upgrade", "boost"], | |
| "acquisition": ["purchase", "buying", "getting"], | |
| "transformation": ["change", "shift", "evolution"], | |
| "optimization": ["improvement", "fine-tuning", "enhancement"], | |
| "establishment": ["creation", "setup", "building"], | |
| "implementation": ["rollout", "launch", "deployment"], | |
| "operational": ["day-to-day", "working", "running"], | |
| "capabilities": ["abilities", "features", "what it can do"], | |
| "specifications": ["specs", "details", "features"], | |
| "functionality": ["features", "what it does", "capabilities"], | |
| "performance": ["how well it works", "results", "output"], | |
| "architecture": ["design", "structure", "framework"], | |
| "integration": ["bringing together", "combining", "merging"], | |
| "sustainability": ["long-term viability", "lasting success"], | |
| "competitive advantages": ["edge over competitors", "what sets us apart"] | |
| } | |
| # Remove robotic AI phrases completely | |
| self.ai_phrase_removals = { | |
| "furthermore,": ["Also,", "Plus,", "What's more,", "On top of that,", "Additionally,"], | |
| "moreover,": ["Also,", "Plus,", "What's more,", "Besides,"], | |
| "subsequently,": ["Then,", "Next,", "After that,", "Later,"], | |
| "consequently,": ["So,", "As a result,", "Therefore,", "This means"], | |
| "accordingly,": ["So,", "Therefore,", "As a result,"], | |
| "nevertheless,": ["However,", "But,", "Still,", "Even so,"], | |
| "nonetheless,": ["However,", "But,", "Still,", "Even so,"], | |
| "it is important to note that": ["Worth noting:", "Importantly,", "Keep in mind that", "Remember that"], | |
| "it is crucial to understand that": ["Here's what's important:", "You should know that", "The key thing is"], | |
| "it should be emphasized that": ["Importantly,", "Key point:", "Worth highlighting:"], | |
| "it is worth mentioning that": ["Also worth noting:", "By the way,", "Interestingly,"], | |
| "from a practical standpoint": ["In practice,", "Realistically,", "In real terms"], | |
| "in terms of implementation": ["When putting this into practice,", "For implementation,", "To make this work"], | |
| "with respect to the aforementioned": ["Regarding what I mentioned,", "About that,", "On this point"], | |
| "as previously mentioned": ["As I said earlier,", "Like I mentioned,", "As noted before"], | |
| "in light of this": ["Because of this,", "Given this,", "With this in mind"], | |
| "upon careful consideration": ["After thinking about it,", "Looking at this closely,", "When you consider"], | |
| "in the final analysis": ["Ultimately,", "When it comes down to it,", "In the end"], | |
| "one must consider": ["You should think about", "Consider", "Keep in mind"], | |
| "it is evident that": ["Clearly,", "Obviously,", "You can see that"], | |
| "it can be observed that": ["You can see", "It's clear that", "Obviously"] | |
| } | |
| # Natural sentence starters for conversational flow | |
| self.natural_starters = [ | |
| "Here's the thing:", "Look,", "The reality is", "What's interesting is", "The truth is", | |
| "Think about it:", "Consider this:", "Here's what happens:", "What this means is", | |
| "The bottom line is", "Simply put,", "In other words,", "To put it another way,", | |
| "What you'll find is", "The key insight is", "What stands out is" | |
| ] | |
| # Conversational connectors | |
| self.conversational_connectors = [ | |
| "And here's why:", "Plus,", "On top of that,", "What's more,", "Beyond that,", | |
| "Here's another thing:", "But wait, there's more:", "And that's not all:", | |
| "Speaking of which,", "Along those lines,", "In the same vein," | |
| ] | |
| # Sentence ending variations | |
| self.authentic_endings = [ | |
| "which is pretty impressive", "and that's significant", "which makes sense", | |
| "and that matters", "which is key", "and this is important" | |
| ] | |
| # Professional contractions | |
| self.contractions = { | |
| "do not": "don't", "does not": "doesn't", "did not": "didn't", "will not": "won't", | |
| "would not": "wouldn't", "should not": "shouldn't", "could not": "couldn't", | |
| "cannot": "can't", "is not": "isn't", "are not": "aren't", "was not": "wasn't", | |
| "were not": "weren't", "have not": "haven't", "has not": "hasn't", "had not": "hadn't", | |
| "I am": "I'm", "you are": "you're", "he is": "he's", "she is": "she's", "it is": "it's", | |
| "we are": "we're", "they are": "they're", "I have": "I've", "you have": "you've", | |
| "we have": "we've", "they have": "they've", "I will": "I'll", "you will": "you'll", | |
| "we will": "we'll", "they will": "they'll", "that is": "that's", "there is": "there's", | |
| "here is": "here's", "what is": "what's", "where is": "where's", "who is": "who's" | |
| } | |
| def preserve_structure(self, original: str, processed: str) -> str: | |
| """Preserve original text structure (paragraphs, formatting)""" | |
| # Split by double newlines (paragraphs) | |
| original_paragraphs = re.split(r'\n\s*\n', original) | |
| if len(original_paragraphs) <= 1: | |
| return processed | |
| # Split processed text into sentences | |
| try: | |
| processed_sentences = sent_tokenize(processed) | |
| except Exception as e: | |
| print(f"β οΈ Sentence tokenization failed, using fallback: {e}") | |
| processed_sentences = re.split(r'[.!?]+', processed) | |
| processed_sentences = [s.strip() for s in processed_sentences if s.strip()] | |
| # Try to maintain paragraph structure | |
| result_paragraphs = [] | |
| sentence_idx = 0 | |
| for para in original_paragraphs: | |
| try: | |
| para_sentences = sent_tokenize(para) | |
| except Exception: | |
| para_sentences = re.split(r'[.!?]+', para) | |
| para_sentences = [s.strip() for s in para_sentences if s.strip()] | |
| para_sentence_count = len(para_sentences) | |
| if sentence_idx + para_sentence_count <= len(processed_sentences): | |
| para_processed = ' '.join(processed_sentences[sentence_idx:sentence_idx + para_sentence_count]) | |
| result_paragraphs.append(para_processed) | |
| sentence_idx += para_sentence_count | |
| else: | |
| # Add remaining sentences to this paragraph | |
| remaining = ' '.join(processed_sentences[sentence_idx:]) | |
| if remaining: | |
| result_paragraphs.append(remaining) | |
| break | |
| return '\n\n'.join(result_paragraphs) | |
| def break_long_sentences(self, text: str) -> str: | |
| """Break overly long sentences into natural, shorter ones""" | |
| try: | |
| sentences = sent_tokenize(text) | |
| except Exception: | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| processed_sentences = [] | |
| for sentence in sentences: | |
| words = sentence.split() | |
| # Break sentences longer than 20 words | |
| if len(words) > 20: | |
| # Find natural break points | |
| break_words = ['and', 'but', 'while', 'because', 'since', 'when', 'where', 'which', 'that', 'as'] | |
| for break_word in break_words: | |
| break_positions = [i for i, word in enumerate(words) if word.lower() == break_word] | |
| for pos in break_positions: | |
| # Only break if it creates reasonable sentence lengths | |
| if 8 <= pos <= len(words) - 8: | |
| first_part = ' '.join(words[:pos]).strip() | |
| second_part = ' '.join(words[pos:]).strip() | |
| if first_part and second_part: | |
| # Ensure proper capitalization | |
| if not first_part.endswith('.'): | |
| first_part += '.' | |
| second_part = second_part[0].upper() + second_part[1:] if len(second_part) > 1 else second_part.upper() | |
| processed_sentences.extend([first_part, second_part]) | |
| break | |
| else: | |
| continue | |
| break | |
| else: | |
| # No good break point found, keep original | |
| processed_sentences.append(sentence) | |
| else: | |
| processed_sentences.append(sentence) | |
| return ' '.join(processed_sentences) | |
| def apply_authentic_word_replacements(self, text: str, intensity: float = 0.8) -> str: | |
| """Replace business jargon with authentic, natural language""" | |
| try: | |
| words = word_tokenize(text) | |
| except Exception: | |
| words = re.findall(r'\b\w+\b|[^\w\s]', text) | |
| modified_words = [] | |
| for word in words: | |
| word_clean = word.lower().strip('.,!?;:"') | |
| if word_clean in self.authentic_replacements and random.random() < intensity: | |
| replacements = self.authentic_replacements[word_clean] | |
| replacement = random.choice(replacements) | |
| # Preserve case | |
| if word.isupper(): | |
| replacement = replacement.upper() | |
| elif word.istitle(): | |
| replacement = replacement.title() | |
| modified_words.append(replacement) | |
| else: | |
| modified_words.append(word) | |
| # Reconstruct with proper spacing | |
| result = "" | |
| for i, word in enumerate(modified_words): | |
| if i > 0 and word not in ".,!?;:\"')": | |
| result += " " | |
| result += word | |
| return result | |
| def remove_ai_phrases(self, text: str, intensity: float = 0.9) -> str: | |
| """Remove robotic AI phrases and replace with natural alternatives""" | |
| # Sort by length (longest first) to avoid partial replacements | |
| sorted_phrases = sorted(self.ai_phrase_removals.items(), key=lambda x: len(x[0]), reverse=True) | |
| for ai_phrase, natural_alternatives in sorted_phrases: | |
| # Case-insensitive search | |
| pattern = re.compile(re.escape(ai_phrase), re.IGNORECASE) | |
| if pattern.search(text) and random.random() < intensity: | |
| replacement = random.choice(natural_alternatives) | |
| # Preserve original case style | |
| if ai_phrase[0].isupper(): | |
| replacement = replacement.capitalize() | |
| text = pattern.sub(replacement, text) | |
| return text | |
| def add_conversational_flow(self, text: str, style: str, intensity: float = 0.6) -> str: | |
| """Add natural, conversational flow to the text""" | |
| try: | |
| sentences = sent_tokenize(text) | |
| except Exception: | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| if len(sentences) < 2: | |
| return text | |
| enhanced_sentences = [] | |
| for i, sentence in enumerate(sentences): | |
| # Add conversational starters occasionally | |
| if (i == 0 or (i > 0 and random.random() < intensity * 0.3)) and style == "conversational": | |
| if random.random() < 0.4: | |
| starter = random.choice(self.natural_starters) | |
| sentence = starter + " " + sentence.lower() | |
| # Add conversational connectors between sentences | |
| elif i > 0 and random.random() < intensity * 0.2 and style == "conversational": | |
| connector = random.choice(self.conversational_connectors) | |
| sentence = connector + " " + sentence.lower() | |
| # Occasionally add authentic endings to sentences | |
| if random.random() < intensity * 0.1 and len(sentence.split()) > 8: | |
| if not sentence.endswith(('.', '!', '?')): | |
| sentence += '.' | |
| ending = random.choice(self.authentic_endings) | |
| sentence = sentence[:-1] + ", " + ending + "." | |
| enhanced_sentences.append(sentence) | |
| return ' '.join(enhanced_sentences) | |
| def apply_natural_contractions(self, text: str, intensity: float = 0.7) -> str: | |
| """Apply contractions for natural flow""" | |
| # Sort by length (longest first) to avoid partial replacements | |
| sorted_contractions = sorted(self.contractions.items(), key=lambda x: len(x[0]), reverse=True) | |
| for formal, contracted in sorted_contractions: | |
| if random.random() < intensity: | |
| pattern = r'\b' + re.escape(formal) + r'\b' | |
| text = re.sub(pattern, contracted, text, flags=re.IGNORECASE) | |
| return text | |
| def add_human_variety(self, text: str, intensity: float = 0.4) -> str: | |
| """Add natural human writing variety and personality""" | |
| try: | |
| sentences = sent_tokenize(text) | |
| except Exception: | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| varied_sentences = [] | |
| for sentence in sentences: | |
| # Vary sentence structure | |
| if len(sentence.split()) > 12 and random.random() < intensity: | |
| # Sometimes start with a dependent clause | |
| if random.random() < 0.3: | |
| # Move a prepositional phrase to the beginning | |
| words = sentence.split() | |
| prep_words = ['with', 'through', 'by', 'using', 'for', 'in', 'on', 'at'] | |
| for j, word in enumerate(words): | |
| if word.lower() in prep_words and j > 3: | |
| # Find the end of the prepositional phrase | |
| end_j = min(j + 4, len(words)) | |
| prep_phrase = ' '.join(words[j:end_j]) | |
| remaining = ' '.join(words[:j] + words[end_j:]) | |
| if remaining: | |
| sentence = prep_phrase.capitalize() + ', ' + remaining.lower() | |
| break | |
| # Sometimes add emphasis with "really", "actually", "definitely" | |
| elif random.random() < 0.2: | |
| emphasis_words = ['really', 'actually', 'definitely', 'truly', 'genuinely'] | |
| emphasis = random.choice(emphasis_words) | |
| words = sentence.split() | |
| # Insert emphasis word after first few words | |
| insert_pos = random.randint(2, min(5, len(words)-1)) | |
| words.insert(insert_pos, emphasis) | |
| sentence = ' '.join(words) | |
| varied_sentences.append(sentence) | |
| return ' '.join(varied_sentences) | |
| def calculate_similarity(self, text1: str, text2: str) -> float: | |
| """Calculate semantic similarity""" | |
| if self.similarity_model: | |
| try: | |
| embeddings1 = self.similarity_model.encode([text1]) | |
| embeddings2 = self.similarity_model.encode([text2]) | |
| similarity = np.dot(embeddings1[0], embeddings2[0]) / ( | |
| np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0]) | |
| ) | |
| return float(similarity) | |
| except Exception: | |
| pass | |
| # Fallback to TF-IDF | |
| if self.tfidf_vectorizer and SKLEARN_AVAILABLE: | |
| try: | |
| tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2]) | |
| similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] | |
| return float(similarity) | |
| except Exception: | |
| pass | |
| # Basic word overlap fallback | |
| try: | |
| words1 = set(word_tokenize(text1.lower())) | |
| words2 = set(word_tokenize(text2.lower())) | |
| except Exception: | |
| words1 = set(re.findall(r'\b\w+\b', text1.lower())) | |
| words2 = set(re.findall(r'\b\w+\b', text2.lower())) | |
| if not words1 or not words2: | |
| return 1.0 if text1 == text2 else 0.0 | |
| intersection = words1.intersection(words2) | |
| union = words1.union(words2) | |
| return len(intersection) / len(union) if union else 1.0 | |
| def humanize_text_authentic(self, | |
| text: str, | |
| style: str = "natural", | |
| intensity: float = 0.7) -> Dict: | |
| """ | |
| Authentic text humanization that makes text truly sound human | |
| Args: | |
| text: Input text to humanize | |
| style: 'natural' or 'conversational' | |
| intensity: Transformation intensity (0.0 to 1.0) | |
| Returns: | |
| Dictionary with results and metrics | |
| """ | |
| if not text.strip(): | |
| return { | |
| "original_text": text, | |
| "humanized_text": text, | |
| "similarity_score": 1.0, | |
| "changes_made": [], | |
| "processing_time_ms": 0.0, | |
| "style": style, | |
| "intensity": intensity, | |
| "structure_preserved": True | |
| } | |
| start_time = time.time() | |
| original_text = text | |
| humanized_text = text | |
| changes_made = [] | |
| try: | |
| # Phase 1: Remove AI phrases and replace with natural alternatives | |
| if intensity > 0.2: | |
| before = humanized_text | |
| humanized_text = self.remove_ai_phrases(humanized_text, intensity * 0.95) | |
| if humanized_text != before: | |
| changes_made.append("Replaced robotic phrases with natural language") | |
| # Phase 2: Break up long, complex sentences | |
| if intensity > 0.3: | |
| before = humanized_text | |
| humanized_text = self.break_long_sentences(humanized_text) | |
| if humanized_text != before: | |
| changes_made.append("Broke up complex sentences for better flow") | |
| # Phase 3: Replace business jargon with authentic language | |
| if intensity > 0.4: | |
| before = humanized_text | |
| humanized_text = self.apply_authentic_word_replacements(humanized_text, intensity * 0.8) | |
| if humanized_text != before: | |
| changes_made.append("Replaced jargon with natural, everyday language") | |
| # Phase 4: Add conversational flow and personality | |
| if intensity > 0.5: | |
| before = humanized_text | |
| humanized_text = self.add_conversational_flow(humanized_text, style, intensity * 0.6) | |
| if humanized_text != before: | |
| changes_made.append("Added conversational flow and personality") | |
| # Phase 5: Apply natural contractions | |
| if intensity > 0.6: | |
| before = humanized_text | |
| humanized_text = self.apply_natural_contractions(humanized_text, intensity * 0.7) | |
| if humanized_text != before: | |
| changes_made.append("Added natural contractions") | |
| # Phase 6: Add human variety and natural patterns | |
| if intensity > 0.7: | |
| before = humanized_text | |
| humanized_text = self.add_human_variety(humanized_text, intensity * 0.4) | |
| if humanized_text != before: | |
| changes_made.append("Added natural human writing variety") | |
| # Phase 7: Preserve original structure | |
| humanized_text = self.preserve_structure(original_text, humanized_text) | |
| # Calculate quality metrics | |
| similarity_score = self.calculate_similarity(original_text, humanized_text) | |
| processing_time = (time.time() - start_time) * 1000 | |
| # Quality control - revert if too different | |
| if similarity_score < 0.65: | |
| print(f"β οΈ Similarity too low ({similarity_score:.3f}), reverting changes") | |
| humanized_text = original_text | |
| similarity_score = 1.0 | |
| changes_made = ["Reverted - maintained original meaning"] | |
| except Exception as e: | |
| print(f"β Error during authentic humanization: {e}") | |
| humanized_text = original_text | |
| similarity_score = 1.0 | |
| changes_made = [f"Processing error - returned original: {str(e)[:100]}"] | |
| return { | |
| "original_text": original_text, | |
| "humanized_text": humanized_text, | |
| "similarity_score": similarity_score, | |
| "changes_made": changes_made, | |
| "processing_time_ms": (time.time() - start_time) * 1000, | |
| "style": style, | |
| "intensity": intensity, | |
| "structure_preserved": True, | |
| "word_count_original": len(original_text.split()), | |
| "word_count_humanized": len(humanized_text.split()), | |
| "character_count_original": len(original_text), | |
| "character_count_humanized": len(humanized_text) | |
| } | |
| def _print_status(self): | |
| """Print current status""" | |
| print("\nπ AUTHENTIC AI TEXT HUMANIZER STATUS:") | |
| print("-" * 50) | |
| print(f"π§ Advanced Similarity: {'β ' if self.similarity_model else 'β'}") | |
| print(f"π€ AI Paraphrasing: {'β ' if self.paraphraser else 'β'}") | |
| print(f"π TF-IDF Fallback: {'β ' if self.tfidf_vectorizer else 'β'}") | |
| print(f"π GPU Acceleration: {'β ' if self.enable_gpu else 'β'}") | |
| print(f"π NLTK Available: {'β ' if self.nltk_available else 'β (using fallbacks)'}") | |
| print(f"β¨ Authentic Patterns: β LOADED") | |
| print(f"π Authentic Replacements: β {len(self.authentic_replacements)} mappings") | |
| print(f"π« AI Phrase Removals: β {len(self.ai_phrase_removals)} patterns") | |
| print(f"π¬ Natural Contractions: β {len(self.contractions)} patterns") | |
| print(f"π£οΈ Conversational Elements: β {len(self.natural_starters)} starters") | |
| print(f"ποΈ Structure Preservation: β ENABLED") | |
| # Calculate feature completeness | |
| features = [ | |
| bool(self.similarity_model), | |
| bool(self.paraphraser), | |
| bool(self.tfidf_vectorizer), | |
| True, # Authentic patterns | |
| True, # Sentence breaking | |
| True, # Conversational flow | |
| True, # Structure preservation | |
| True # Quality control | |
| ] | |
| completeness = (sum(features) / len(features)) * 100 | |
| print(f"π― Authentic System Completeness: {completeness:.1f}%") | |
| if completeness >= 80: | |
| print("π READY FOR AUTHENTIC HUMANIZATION!") | |
| elif completeness >= 60: | |
| print("β Core features ready - some advanced features may be limited") | |
| else: | |
| print("β οΈ Basic mode - install additional dependencies for full features") | |
| # For backward compatibility, use the same method name | |
| UniversalAITextHumanizer = AuthenticAITextHumanizer | |
| # Test function | |
| if __name__ == "__main__": | |
| humanizer = AuthenticAITextHumanizer() | |
| # Test with your examples | |
| test_cases = [ | |
| { | |
| "name": "Smartphone Description", | |
| "text": "Furthermore, this revolutionary smartphone demonstrates exceptional technological capabilities and utilizes advanced processing architecture to ensure optimal performance across all applications. Subsequently, users will experience significant improvements in their daily productivity and entertainment consumption. Moreover, the comprehensive design facilitates seamless integration with existing ecosystems while maintaining superior battery efficiency.", | |
| "style": "natural" | |
| }, | |
| { | |
| "name": "Business Proposal", | |
| "text": "Our comprehensive proposal demonstrates significant value proposition and utilizes proven methodologies to ensure optimal project outcomes. Furthermore, the systematic implementation of our advanced framework will facilitate substantial improvements in your operational efficiency. It is important to note that our experienced team possesses exceptional expertise and demonstrates remarkable track record in delivering complex solutions.", | |
| "style": "conversational" | |
| } | |
| ] | |
| print(f"\nπ§ͺ TESTING AUTHENTIC HUMANIZER") | |
| print("=" * 45) | |
| for i, test_case in enumerate(test_cases, 1): | |
| print(f"\n㪠Test {i}: {test_case['name']}") | |
| print("-" * 50) | |
| print(f"π Original: {test_case['text']}") | |
| result = humanizer.humanize_text_authentic( | |
| text=test_case['text'], | |
| style=test_case['style'], | |
| intensity=0.8 | |
| ) | |
| print(f"β¨ Authentic: {result['humanized_text']}") | |
| print(f"π Similarity: {result['similarity_score']:.3f}") | |
| print(f"β‘ Processing: {result['processing_time_ms']:.1f}ms") | |
| print(f"π§ Changes: {', '.join(result['changes_made'])}") | |
| print(f"\nπ Authentic testing completed!") | |
| print(f"β¨ Ready for truly human-like text transformation!") |