File size: 34,893 Bytes
a9b4a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
import re
import random
import nltk
import numpy as np
from typing import List, Dict, Optional
import time
from collections import Counter
import statistics

# Robust NLTK data downloader that handles version differences
def ensure_nltk_data():
    """Download required NLTK data with fallbacks for different versions"""
    
    # Resources to download (try both old and new names)
    resources_to_try = [
        # Punkt tokenizer (try both versions)
        [('punkt', 'tokenizers/punkt'), ('punkt_tab', 'tokenizers/punkt_tab')],
        # Wordnet
        [('wordnet', 'corpora/wordnet')],
        # OMW data
        [('omw-1.4', 'corpora/omw-1.4')]
    ]
    
    for resource_group in resources_to_try:
        downloaded = False
        for resource_name, resource_path in resource_group:
            try:
                nltk.data.find(resource_path)
                print(f"βœ… Found {resource_name}")
                downloaded = True
                break
            except LookupError:
                try:
                    print(f"πŸ”„ Downloading {resource_name}...")
                    nltk.download(resource_name, quiet=True)
                    print(f"βœ… Downloaded {resource_name}")
                    downloaded = True
                    break
                except Exception as e:
                    print(f"⚠️ Failed to download {resource_name}: {e}")
                    continue
        
        if not downloaded:
            resource_names = [name for name, _ in resource_group]
            print(f"❌ Could not download any of: {resource_names}")

# Alternative function that tries multiple approaches
def robust_nltk_setup():
    """More robust NLTK setup with multiple fallback strategies"""
    
    print("πŸ”§ Setting up NLTK resources...")
    
    # Strategy 1: Try standard downloads
    try:
        ensure_nltk_data()
    except Exception as e:
        print(f"⚠️ Standard setup failed: {e}")
    
    # Strategy 2: Force download common resources
    common_resources = ['punkt', 'punkt_tab', 'wordnet', 'omw-1.4', 'averaged_perceptron_tagger']
    for resource in common_resources:
        try:
            nltk.download(resource, quiet=True)
            print(f"βœ… Force downloaded {resource}")
        except Exception as e:
            print(f"⚠️ Could not force download {resource}: {e}")
    
    # Strategy 3: Test if tokenization works
    try:
        from nltk.tokenize import sent_tokenize, word_tokenize
        # Test with a simple sentence
        test_sentences = sent_tokenize("This is a test. This is another test.")
        test_words = word_tokenize("This is a test sentence.")
        print(f"βœ… Tokenization test passed: {len(test_sentences)} sentences, {len(test_words)} words")
        return True
    except Exception as e:
        print(f"❌ Tokenization test failed: {e}")
        return False

# Run the robust setup
print("πŸš€ Loading Authentic AI Text Humanizer...")
setup_success = robust_nltk_setup()

# Try importing NLTK functions with fallbacks
try:
    from nltk.tokenize import sent_tokenize, word_tokenize
    from nltk.corpus import wordnet
    print("βœ… NLTK imports successful")
    NLTK_AVAILABLE = True
except ImportError as e:
    print(f"❌ NLTK imports failed: {e}")
    print("πŸ”„ Trying alternative tokenization methods...")
    NLTK_AVAILABLE = False
    
    # Fallback tokenization functions
    def sent_tokenize(text):
        """Fallback sentence tokenizer"""
        import re
        # Simple sentence splitting on periods, exclamation marks, question marks
        sentences = re.split(r'[.!?]+', text)
        return [s.strip() for s in sentences if s.strip()]
    
    def word_tokenize(text):
        """Fallback word tokenizer"""
        import re
        # Simple word splitting on whitespace and punctuation
        words = re.findall(r'\b\w+\b|[^\w\s]', text)
        return words
    
    # Mock wordnet for fallback
    class MockWordNet:
        def synsets(self, word):
            return []
    
    wordnet = MockWordNet()

# Advanced imports with fallbacks
def safe_import_with_fallback(module_name, component=None):
    """Safe import with fallback handling"""
    try:
        if component:
            module = __import__(module_name, fromlist=[component])
            return getattr(module, component), True
        else:
            return __import__(module_name), True
    except ImportError:
        return None, False
    except Exception:
        return None, False

# Load advanced models
SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_fallback('sentence_transformers', 'SentenceTransformer')
pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_fallback('transformers', 'pipeline')

try:
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity
    SKLEARN_AVAILABLE = True
except ImportError:
    SKLEARN_AVAILABLE = False

try:
    import torch
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False

class AuthenticAITextHumanizer:
    """
    Authentic AI Text Humanizer - Makes text truly sound human and natural
    Based on analysis of authentic human writing patterns
    """
    
    def __init__(self, enable_gpu=True):
        print("🎯 Initializing Authentic AI Text Humanizer...")
        print("✨ Designed to write like a real human - authentic & natural")
        
        self.enable_gpu = enable_gpu and TORCH_AVAILABLE
        self.nltk_available = NLTK_AVAILABLE
        
        # Initialize models and authentic patterns
        self._load_models()
        self._initialize_authentic_patterns()
        
        print("βœ… Authentic AI Text Humanizer ready!")
        self._print_status()
    
    def _load_models(self):
        """Load AI models with graceful fallbacks"""
        self.similarity_model = None
        self.paraphraser = None
        
        # Load sentence transformer for quality control
        if SENTENCE_TRANSFORMERS_AVAILABLE:
            try:
                device = 'cuda' if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu'
                self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
                print("βœ… Advanced similarity model loaded")
            except Exception as e:
                print(f"⚠️ Similarity model unavailable: {e}")
        
        # Load paraphrasing model
        if TRANSFORMERS_AVAILABLE:
            try:
                device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1
                self.paraphraser = pipeline(
                    "text2text-generation",
                    model="google/flan-t5-small",
                    device=device,
                    max_length=256
                )
                print("βœ… AI paraphrasing model loaded")
            except Exception as e:
                print(f"⚠️ Paraphrasing model unavailable: {e}")
        
        # Fallback similarity using TF-IDF
        if SKLEARN_AVAILABLE:
            self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000)
        else:
            self.tfidf_vectorizer = None
    
    def _initialize_authentic_patterns(self):
        """Initialize authentic human writing patterns"""
        
        # Authentic word replacements - how humans actually write
        self.authentic_replacements = {
            # Business jargon -> Natural language
            "utilize": ["use", "work with", "employ"],
            "facilitate": ["help", "make it easier to", "enable", "allow"],
            "demonstrate": ["show", "prove", "reveal", "display"],
            "implement": ["put in place", "start using", "set up", "roll out"],
            "optimize": ["improve", "make better", "enhance"],
            "leverage": ["use", "take advantage of", "make use of"],
            "comprehensive": ["complete", "thorough", "full", "extensive"],
            "substantial": ["significant", "major", "big", "considerable"],
            "exceptional": ["outstanding", "remarkable", "impressive", "excellent"],
            "systematic": ["structured", "organized", "methodical"],
            "revolutionary": ["groundbreaking", "innovative", "cutting-edge", "game-changing"],
            "unprecedented": ["never-before-seen", "unique", "extraordinary", "first-of-its-kind"],
            "methodology": ["approach", "method", "way", "strategy"],
            "enhancement": ["improvement", "upgrade", "boost"],
            "acquisition": ["purchase", "buying", "getting"],
            "transformation": ["change", "shift", "evolution"],
            "optimization": ["improvement", "fine-tuning", "enhancement"],
            "establishment": ["creation", "setup", "building"],
            "implementation": ["rollout", "launch", "deployment"],
            "operational": ["day-to-day", "working", "running"],
            "capabilities": ["abilities", "features", "what it can do"],
            "specifications": ["specs", "details", "features"],
            "functionality": ["features", "what it does", "capabilities"],
            "performance": ["how well it works", "results", "output"],
            "architecture": ["design", "structure", "framework"],
            "integration": ["bringing together", "combining", "merging"],
            "sustainability": ["long-term viability", "lasting success"],
            "competitive advantages": ["edge over competitors", "what sets us apart"]
        }
        
        # Remove robotic AI phrases completely
        self.ai_phrase_removals = {
            "furthermore,": ["Also,", "Plus,", "What's more,", "On top of that,", "Additionally,"],
            "moreover,": ["Also,", "Plus,", "What's more,", "Besides,"],
            "subsequently,": ["Then,", "Next,", "After that,", "Later,"],
            "consequently,": ["So,", "As a result,", "Therefore,", "This means"],
            "accordingly,": ["So,", "Therefore,", "As a result,"],
            "nevertheless,": ["However,", "But,", "Still,", "Even so,"],
            "nonetheless,": ["However,", "But,", "Still,", "Even so,"],
            "it is important to note that": ["Worth noting:", "Importantly,", "Keep in mind that", "Remember that"],
            "it is crucial to understand that": ["Here's what's important:", "You should know that", "The key thing is"],
            "it should be emphasized that": ["Importantly,", "Key point:", "Worth highlighting:"],
            "it is worth mentioning that": ["Also worth noting:", "By the way,", "Interestingly,"],
            "from a practical standpoint": ["In practice,", "Realistically,", "In real terms"],
            "in terms of implementation": ["When putting this into practice,", "For implementation,", "To make this work"],
            "with respect to the aforementioned": ["Regarding what I mentioned,", "About that,", "On this point"],
            "as previously mentioned": ["As I said earlier,", "Like I mentioned,", "As noted before"],
            "in light of this": ["Because of this,", "Given this,", "With this in mind"],
            "upon careful consideration": ["After thinking about it,", "Looking at this closely,", "When you consider"],
            "in the final analysis": ["Ultimately,", "When it comes down to it,", "In the end"],
            "one must consider": ["You should think about", "Consider", "Keep in mind"],
            "it is evident that": ["Clearly,", "Obviously,", "You can see that"],
            "it can be observed that": ["You can see", "It's clear that", "Obviously"]
        }
        
        # Natural sentence starters for conversational flow
        self.natural_starters = [
            "Here's the thing:", "Look,", "The reality is", "What's interesting is", "The truth is",
            "Think about it:", "Consider this:", "Here's what happens:", "What this means is",
            "The bottom line is", "Simply put,", "In other words,", "To put it another way,",
            "What you'll find is", "The key insight is", "What stands out is"
        ]
        
        # Conversational connectors
        self.conversational_connectors = [
            "And here's why:", "Plus,", "On top of that,", "What's more,", "Beyond that,",
            "Here's another thing:", "But wait, there's more:", "And that's not all:",
            "Speaking of which,", "Along those lines,", "In the same vein,"
        ]
        
        # Sentence ending variations
        self.authentic_endings = [
            "which is pretty impressive", "and that's significant", "which makes sense",
            "and that matters", "which is key", "and this is important"
        ]
        
        # Professional contractions
        self.contractions = {
            "do not": "don't", "does not": "doesn't", "did not": "didn't", "will not": "won't",
            "would not": "wouldn't", "should not": "shouldn't", "could not": "couldn't", 
            "cannot": "can't", "is not": "isn't", "are not": "aren't", "was not": "wasn't", 
            "were not": "weren't", "have not": "haven't", "has not": "hasn't", "had not": "hadn't",
            "I am": "I'm", "you are": "you're", "he is": "he's", "she is": "she's", "it is": "it's",
            "we are": "we're", "they are": "they're", "I have": "I've", "you have": "you've",
            "we have": "we've", "they have": "they've", "I will": "I'll", "you will": "you'll",
            "we will": "we'll", "they will": "they'll", "that is": "that's", "there is": "there's",
            "here is": "here's", "what is": "what's", "where is": "where's", "who is": "who's"
        }
    
    def preserve_structure(self, original: str, processed: str) -> str:
        """Preserve original text structure (paragraphs, formatting)"""
        # Split by double newlines (paragraphs)
        original_paragraphs = re.split(r'\n\s*\n', original)
        if len(original_paragraphs) <= 1:
            return processed
        
        # Split processed text into sentences
        try:
            processed_sentences = sent_tokenize(processed)
        except Exception as e:
            print(f"⚠️ Sentence tokenization failed, using fallback: {e}")
            processed_sentences = re.split(r'[.!?]+', processed)
            processed_sentences = [s.strip() for s in processed_sentences if s.strip()]
        
        # Try to maintain paragraph structure
        result_paragraphs = []
        sentence_idx = 0
        
        for para in original_paragraphs:
            try:
                para_sentences = sent_tokenize(para)
            except Exception:
                para_sentences = re.split(r'[.!?]+', para)
                para_sentences = [s.strip() for s in para_sentences if s.strip()]
            
            para_sentence_count = len(para_sentences)
            
            if sentence_idx + para_sentence_count <= len(processed_sentences):
                para_processed = ' '.join(processed_sentences[sentence_idx:sentence_idx + para_sentence_count])
                result_paragraphs.append(para_processed)
                sentence_idx += para_sentence_count
            else:
                # Add remaining sentences to this paragraph
                remaining = ' '.join(processed_sentences[sentence_idx:])
                if remaining:
                    result_paragraphs.append(remaining)
                break
        
        return '\n\n'.join(result_paragraphs)
    
    def break_long_sentences(self, text: str) -> str:
        """Break overly long sentences into natural, shorter ones"""
        try:
            sentences = sent_tokenize(text)
        except Exception:
            sentences = re.split(r'[.!?]+', text)
            sentences = [s.strip() for s in sentences if s.strip()]
        
        processed_sentences = []
        
        for sentence in sentences:
            words = sentence.split()
            
            # Break sentences longer than 20 words
            if len(words) > 20:
                # Find natural break points
                break_words = ['and', 'but', 'while', 'because', 'since', 'when', 'where', 'which', 'that', 'as']
                
                for break_word in break_words:
                    break_positions = [i for i, word in enumerate(words) if word.lower() == break_word]
                    
                    for pos in break_positions:
                        # Only break if it creates reasonable sentence lengths
                        if 8 <= pos <= len(words) - 8:
                            first_part = ' '.join(words[:pos]).strip()
                            second_part = ' '.join(words[pos:]).strip()
                            
                            if first_part and second_part:
                                # Ensure proper capitalization
                                if not first_part.endswith('.'):
                                    first_part += '.'
                                second_part = second_part[0].upper() + second_part[1:] if len(second_part) > 1 else second_part.upper()
                                
                                processed_sentences.extend([first_part, second_part])
                                break
                    else:
                        continue
                    break
                else:
                    # No good break point found, keep original
                    processed_sentences.append(sentence)
            else:
                processed_sentences.append(sentence)
        
        return ' '.join(processed_sentences)
    
    def apply_authentic_word_replacements(self, text: str, intensity: float = 0.8) -> str:
        """Replace business jargon with authentic, natural language"""
        try:
            words = word_tokenize(text)
        except Exception:
            words = re.findall(r'\b\w+\b|[^\w\s]', text)
        
        modified_words = []
        
        for word in words:
            word_clean = word.lower().strip('.,!?;:"')
            
            if word_clean in self.authentic_replacements and random.random() < intensity:
                replacements = self.authentic_replacements[word_clean]
                replacement = random.choice(replacements)
                
                # Preserve case
                if word.isupper():
                    replacement = replacement.upper()
                elif word.istitle():
                    replacement = replacement.title()
                
                modified_words.append(replacement)
            else:
                modified_words.append(word)
        
        # Reconstruct with proper spacing
        result = ""
        for i, word in enumerate(modified_words):
            if i > 0 and word not in ".,!?;:\"')":
                result += " "
            result += word
        
        return result
    
    def remove_ai_phrases(self, text: str, intensity: float = 0.9) -> str:
        """Remove robotic AI phrases and replace with natural alternatives"""
        
        # Sort by length (longest first) to avoid partial replacements
        sorted_phrases = sorted(self.ai_phrase_removals.items(), key=lambda x: len(x[0]), reverse=True)
        
        for ai_phrase, natural_alternatives in sorted_phrases:
            # Case-insensitive search
            pattern = re.compile(re.escape(ai_phrase), re.IGNORECASE)
            
            if pattern.search(text) and random.random() < intensity:
                replacement = random.choice(natural_alternatives)
                
                # Preserve original case style
                if ai_phrase[0].isupper():
                    replacement = replacement.capitalize()
                
                text = pattern.sub(replacement, text)
        
        return text
    
    def add_conversational_flow(self, text: str, style: str, intensity: float = 0.6) -> str:
        """Add natural, conversational flow to the text"""
        try:
            sentences = sent_tokenize(text)
        except Exception:
            sentences = re.split(r'[.!?]+', text)
            sentences = [s.strip() for s in sentences if s.strip()]
        
        if len(sentences) < 2:
            return text
        
        enhanced_sentences = []
        
        for i, sentence in enumerate(sentences):
            # Add conversational starters occasionally
            if (i == 0 or (i > 0 and random.random() < intensity * 0.3)) and style == "conversational":
                if random.random() < 0.4:
                    starter = random.choice(self.natural_starters)
                    sentence = starter + " " + sentence.lower()
            
            # Add conversational connectors between sentences
            elif i > 0 and random.random() < intensity * 0.2 and style == "conversational":
                connector = random.choice(self.conversational_connectors)
                sentence = connector + " " + sentence.lower()
            
            # Occasionally add authentic endings to sentences
            if random.random() < intensity * 0.1 and len(sentence.split()) > 8:
                if not sentence.endswith(('.', '!', '?')):
                    sentence += '.'
                ending = random.choice(self.authentic_endings)
                sentence = sentence[:-1] + ", " + ending + "."
            
            enhanced_sentences.append(sentence)
        
        return ' '.join(enhanced_sentences)
    
    def apply_natural_contractions(self, text: str, intensity: float = 0.7) -> str:
        """Apply contractions for natural flow"""
        
        # Sort by length (longest first) to avoid partial replacements
        sorted_contractions = sorted(self.contractions.items(), key=lambda x: len(x[0]), reverse=True)
        
        for formal, contracted in sorted_contractions:
            if random.random() < intensity:
                pattern = r'\b' + re.escape(formal) + r'\b'
                text = re.sub(pattern, contracted, text, flags=re.IGNORECASE)
        
        return text
    
    def add_human_variety(self, text: str, intensity: float = 0.4) -> str:
        """Add natural human writing variety and personality"""
        try:
            sentences = sent_tokenize(text)
        except Exception:
            sentences = re.split(r'[.!?]+', text)
            sentences = [s.strip() for s in sentences if s.strip()]
        
        varied_sentences = []
        
        for sentence in sentences:
            # Vary sentence structure
            if len(sentence.split()) > 12 and random.random() < intensity:
                # Sometimes start with a dependent clause
                if random.random() < 0.3:
                    # Move a prepositional phrase to the beginning
                    words = sentence.split()
                    prep_words = ['with', 'through', 'by', 'using', 'for', 'in', 'on', 'at']
                    
                    for j, word in enumerate(words):
                        if word.lower() in prep_words and j > 3:
                            # Find the end of the prepositional phrase
                            end_j = min(j + 4, len(words))
                            prep_phrase = ' '.join(words[j:end_j])
                            remaining = ' '.join(words[:j] + words[end_j:])
                            
                            if remaining:
                                sentence = prep_phrase.capitalize() + ', ' + remaining.lower()
                            break
                
                # Sometimes add emphasis with "really", "actually", "definitely"
                elif random.random() < 0.2:
                    emphasis_words = ['really', 'actually', 'definitely', 'truly', 'genuinely']
                    emphasis = random.choice(emphasis_words)
                    words = sentence.split()
                    
                    # Insert emphasis word after first few words
                    insert_pos = random.randint(2, min(5, len(words)-1))
                    words.insert(insert_pos, emphasis)
                    sentence = ' '.join(words)
            
            varied_sentences.append(sentence)
        
        return ' '.join(varied_sentences)
    
    def calculate_similarity(self, text1: str, text2: str) -> float:
        """Calculate semantic similarity"""
        if self.similarity_model:
            try:
                embeddings1 = self.similarity_model.encode([text1])
                embeddings2 = self.similarity_model.encode([text2])
                similarity = np.dot(embeddings1[0], embeddings2[0]) / (
                    np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0])
                )
                return float(similarity)
            except Exception:
                pass
        
        # Fallback to TF-IDF
        if self.tfidf_vectorizer and SKLEARN_AVAILABLE:
            try:
                tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2])
                similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
                return float(similarity)
            except Exception:
                pass
        
        # Basic word overlap fallback
        try:
            words1 = set(word_tokenize(text1.lower()))
            words2 = set(word_tokenize(text2.lower()))
        except Exception:
            words1 = set(re.findall(r'\b\w+\b', text1.lower()))
            words2 = set(re.findall(r'\b\w+\b', text2.lower()))
        
        if not words1 or not words2:
            return 1.0 if text1 == text2 else 0.0
        
        intersection = words1.intersection(words2)
        union = words1.union(words2)
        return len(intersection) / len(union) if union else 1.0
    
    def humanize_text_authentic(self, 
                               text: str, 
                               style: str = "natural",
                               intensity: float = 0.7) -> Dict:
        """
        Authentic text humanization that makes text truly sound human
        
        Args:
            text: Input text to humanize
            style: 'natural' or 'conversational'
            intensity: Transformation intensity (0.0 to 1.0)
        
        Returns:
            Dictionary with results and metrics
        """
        if not text.strip():
            return {
                "original_text": text,
                "humanized_text": text,
                "similarity_score": 1.0,
                "changes_made": [],
                "processing_time_ms": 0.0,
                "style": style,
                "intensity": intensity,
                "structure_preserved": True
            }
        
        start_time = time.time()
        original_text = text
        humanized_text = text
        changes_made = []
        
        try:
            # Phase 1: Remove AI phrases and replace with natural alternatives
            if intensity > 0.2:
                before = humanized_text
                humanized_text = self.remove_ai_phrases(humanized_text, intensity * 0.95)
                if humanized_text != before:
                    changes_made.append("Replaced robotic phrases with natural language")
            
            # Phase 2: Break up long, complex sentences
            if intensity > 0.3:
                before = humanized_text
                humanized_text = self.break_long_sentences(humanized_text)
                if humanized_text != before:
                    changes_made.append("Broke up complex sentences for better flow")
            
            # Phase 3: Replace business jargon with authentic language
            if intensity > 0.4:
                before = humanized_text
                humanized_text = self.apply_authentic_word_replacements(humanized_text, intensity * 0.8)
                if humanized_text != before:
                    changes_made.append("Replaced jargon with natural, everyday language")
            
            # Phase 4: Add conversational flow and personality
            if intensity > 0.5:
                before = humanized_text
                humanized_text = self.add_conversational_flow(humanized_text, style, intensity * 0.6)
                if humanized_text != before:
                    changes_made.append("Added conversational flow and personality")
            
            # Phase 5: Apply natural contractions
            if intensity > 0.6:
                before = humanized_text
                humanized_text = self.apply_natural_contractions(humanized_text, intensity * 0.7)
                if humanized_text != before:
                    changes_made.append("Added natural contractions")
            
            # Phase 6: Add human variety and natural patterns
            if intensity > 0.7:
                before = humanized_text
                humanized_text = self.add_human_variety(humanized_text, intensity * 0.4)
                if humanized_text != before:
                    changes_made.append("Added natural human writing variety")
            
            # Phase 7: Preserve original structure
            humanized_text = self.preserve_structure(original_text, humanized_text)
            
            # Calculate quality metrics
            similarity_score = self.calculate_similarity(original_text, humanized_text)
            processing_time = (time.time() - start_time) * 1000
            
            # Quality control - revert if too different
            if similarity_score < 0.65:
                print(f"⚠️ Similarity too low ({similarity_score:.3f}), reverting changes")
                humanized_text = original_text
                similarity_score = 1.0
                changes_made = ["Reverted - maintained original meaning"]
            
        except Exception as e:
            print(f"❌ Error during authentic humanization: {e}")
            humanized_text = original_text
            similarity_score = 1.0
            changes_made = [f"Processing error - returned original: {str(e)[:100]}"]
        
        return {
            "original_text": original_text,
            "humanized_text": humanized_text,
            "similarity_score": similarity_score,
            "changes_made": changes_made,
            "processing_time_ms": (time.time() - start_time) * 1000,
            "style": style,
            "intensity": intensity,
            "structure_preserved": True,
            "word_count_original": len(original_text.split()),
            "word_count_humanized": len(humanized_text.split()),
            "character_count_original": len(original_text),
            "character_count_humanized": len(humanized_text)
        }
    
    def _print_status(self):
        """Print current status"""
        print("\nπŸ“Š AUTHENTIC AI TEXT HUMANIZER STATUS:")
        print("-" * 50)
        print(f"🧠 Advanced Similarity: {'βœ…' if self.similarity_model else '❌'}")
        print(f"πŸ€– AI Paraphrasing: {'βœ…' if self.paraphraser else '❌'}")
        print(f"πŸ“Š TF-IDF Fallback: {'βœ…' if self.tfidf_vectorizer else '❌'}")
        print(f"πŸš€ GPU Acceleration: {'βœ…' if self.enable_gpu else '❌'}")
        print(f"πŸ“š NLTK Available: {'βœ…' if self.nltk_available else '❌ (using fallbacks)'}")
        print(f"✨ Authentic Patterns: βœ… LOADED")
        print(f"πŸ“ Authentic Replacements: βœ… {len(self.authentic_replacements)} mappings")
        print(f"🚫 AI Phrase Removals: βœ… {len(self.ai_phrase_removals)} patterns")
        print(f"πŸ’¬ Natural Contractions: βœ… {len(self.contractions)} patterns")
        print(f"πŸ—£οΈ Conversational Elements: βœ… {len(self.natural_starters)} starters")
        print(f"πŸ—οΈ Structure Preservation: βœ… ENABLED")
        
        # Calculate feature completeness
        features = [
            bool(self.similarity_model),
            bool(self.paraphraser),
            bool(self.tfidf_vectorizer),
            True,  # Authentic patterns
            True,  # Sentence breaking
            True,  # Conversational flow
            True,  # Structure preservation
            True   # Quality control
        ]
        completeness = (sum(features) / len(features)) * 100
        print(f"🎯 Authentic System Completeness: {completeness:.1f}%")
        
        if completeness >= 80:
            print("πŸŽ‰ READY FOR AUTHENTIC HUMANIZATION!")
        elif completeness >= 60:
            print("βœ… Core features ready - some advanced features may be limited")
        else:
            print("⚠️ Basic mode - install additional dependencies for full features")

# For backward compatibility, use the same method name
UniversalAITextHumanizer = AuthenticAITextHumanizer

# Test function
if __name__ == "__main__":
    humanizer = AuthenticAITextHumanizer()
    
    # Test with your examples
    test_cases = [
        {
            "name": "Smartphone Description",
            "text": "Furthermore, this revolutionary smartphone demonstrates exceptional technological capabilities and utilizes advanced processing architecture to ensure optimal performance across all applications. Subsequently, users will experience significant improvements in their daily productivity and entertainment consumption. Moreover, the comprehensive design facilitates seamless integration with existing ecosystems while maintaining superior battery efficiency.",
            "style": "natural"
        },
        {
            "name": "Business Proposal",
            "text": "Our comprehensive proposal demonstrates significant value proposition and utilizes proven methodologies to ensure optimal project outcomes. Furthermore, the systematic implementation of our advanced framework will facilitate substantial improvements in your operational efficiency. It is important to note that our experienced team possesses exceptional expertise and demonstrates remarkable track record in delivering complex solutions.",
            "style": "conversational"
        }
    ]
    
    print(f"\nπŸ§ͺ TESTING AUTHENTIC HUMANIZER")
    print("=" * 45)
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\nπŸ”¬ Test {i}: {test_case['name']}")
        print("-" * 50)
        print(f"πŸ“ Original: {test_case['text']}")
        
        result = humanizer.humanize_text_authentic(
            text=test_case['text'],
            style=test_case['style'],
            intensity=0.8
        )
        
        print(f"✨ Authentic: {result['humanized_text']}")
        print(f"πŸ“Š Similarity: {result['similarity_score']:.3f}")
        print(f"⚑ Processing: {result['processing_time_ms']:.1f}ms")
        print(f"πŸ”§ Changes: {', '.join(result['changes_made'])}")
    
    print(f"\nπŸŽ‰ Authentic testing completed!")
    print(f"✨ Ready for truly human-like text transformation!")