Spaces:

conversantech
/

humanizer-ai

Running

App Files Files Community

conversantech commited on Jun 25

Commit

2c89e89

1 Parent(s): 7826103

changes

Browse files

Files changed (1) hide show

app.py +142 -22

app.py CHANGED Viewed

@@ -12,19 +12,10 @@ os.environ['NLTK_DATA'] = '/tmp/nltk_data'
 def download_nltk_data():
     """Download required NLTK data with proper error handling"""
     try:
-        # Create the directory if it doesn't exist
         os.makedirs('/tmp/nltk_data', exist_ok=True)
-        # Add the path to NLTK's data path
         nltk.data.path.append('/tmp/nltk_data')
-        # Download required NLTK data - use punkt_tab for newer NLTK versions
-        required_data = [
-            'punkt_tab',  # For newer NLTK versions (3.9+)
-            'punkt',      # Fallback for older versions
-            'averaged_perceptron_tagger',
-            'stopwords'
-        ]
         for data in required_data:
             try:
@@ -34,12 +25,10 @@ def download_nltk_data():
                 print(f"Failed to download {data}: {e}")
         print("NLTK data download completed")
-        print(f"NLTK data paths: {nltk.data.path}")
     except Exception as e:
         print(f"NLTK setup error: {e}")
-# Download NLTK data at startup
 download_nltk_data()
 class AIContentHumanizer:
@@ -47,14 +36,146 @@ class AIContentHumanizer:
         self.setup_humanization_patterns()
     def setup_humanization_patterns(self):
-        # Your existing patterns code here...
         self.ai_replacements = {
             r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
-            # ... rest of your patterns
         }
-        # ... rest of your existing code
     def get_readability_score(self, text):
         try:
             score = flesch_reading_ease(text)
             grade = flesch_kincaid_grade(text)
@@ -67,22 +188,22 @@ class AIContentHumanizer:
             return f"Could not calculate readability: {str(e)}"
     def humanize_text(self, text, intensity="medium"):
         if not text or not text.strip():
             return "Please provide text to humanize."
         try:
             text = text.strip()
-            # Test NLTK functionality before proceeding
             try:
-                # Try to tokenize a simple sentence to verify NLTK is working
                 test_tokens = sent_tokenize("This is a test sentence.")
                 if not test_tokens:
                     raise Exception("NLTK tokenization failed")
             except Exception as nltk_error:
                 return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
-            # Your existing humanization logic here...
             text = self.replace_ai_phrases(text)
             text = self.add_contractions(text)
@@ -99,9 +220,8 @@ class AIContentHumanizer:
         except Exception as e:
             return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
-    # ... rest of your existing methods
 def create_interface():
     humanizer = AIContentHumanizer()
     def process_text(input_text, intensity):
@@ -118,12 +238,12 @@ def create_interface():
         gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
 Transform AI-generated content into human-sounding, casual, and readable text!""")
-        input_text = gr.Textbox(label="AI-generated Text", lines=8)
         intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
         output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
         readability = gr.Textbox(label="Readability Score", lines=2)
-        btn = gr.Button("Humanize Text")
         btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
         input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])

 def download_nltk_data():
     """Download required NLTK data with proper error handling"""
     try:
         os.makedirs('/tmp/nltk_data', exist_ok=True)
         nltk.data.path.append('/tmp/nltk_data')
+        required_data = ['punkt_tab', 'punkt', 'averaged_perceptron_tagger', 'stopwords']
         for data in required_data:
             try:
                 print(f"Failed to download {data}: {e}")
         print("NLTK data download completed")
     except Exception as e:
         print(f"NLTK setup error: {e}")
 download_nltk_data()
 class AIContentHumanizer:
         self.setup_humanization_patterns()
     def setup_humanization_patterns(self):
+        """Setup patterns for AI phrase replacement"""
         self.ai_replacements = {
             r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
+            r'\bit is worth noting that\b': ["interestingly", "notably", "it's worth mentioning"],
+            r'\bin conclusion\b': ["to wrap up", "all in all", "bottom line"],
+            r'\bfurthermore\b': ["plus", "also", "on top of that"],
+            r'\bmoreover\b': ["what's more", "besides", "additionally"],
+            r'\bhowever\b': ["but", "though", "on the flip side"],
+            r'\bnevertheless\b': ["still", "even so", "that said"],
+            r'\btherefore\b': ["so", "thus", "as a result"],
+            r'\bconsequently\b': ["as a result", "so", "because of this"],
+            r'\bin order to\b': ["to", "so we can", "for"],
+            r'\bdue to the fact that\b': ["because", "since", "given that"],
+            r'\bwith regard to\b': ["about", "regarding", "when it comes to"],
+            r'\bit should be noted that\b': ["note that", "remember", "keep in mind"],
+            r'\bit is essential to\b': ["you need to", "it's crucial to", "make sure to"],
+            r'\bsubsequently\b': ["then", "next", "after that"],
+            r'\bultimately\b': ["in the end", "finally", "when all is said and done"]
+        }
+        self.contractions = {
+            r'\bdo not\b': "don't",
+            r'\bdoes not\b': "doesn't",
+            r'\bdid not\b': "didn't",
+            r'\bwill not\b': "won't",
+            r'\bwould not\b': "wouldn't",
+            r'\bcould not\b': "couldn't",
+            r'\bshould not\b': "shouldn't",
+            r'\bcannot\b': "can't",
+            r'\bis not\b': "isn't",
+            r'\bare not\b': "aren't",
+            r'\bwas not\b': "wasn't",
+            r'\bwere not\b': "weren't",
+            r'\bhave not\b': "haven't",
+            r'\bhas not\b': "hasn't",
+            r'\bhad not\b': "hadn't",
+            r'\bI will\b': "I'll",
+            r'\byou will\b': "you'll",
+            r'\bhe will\b': "he'll",
+            r'\bshe will\b': "she'll",
+            r'\bwe will\b': "we'll",
+            r'\bthey will\b': "they'll",
+            r'\bI would\b': "I'd",
+            r'\byou would\b': "you'd",
+            r'\bI have\b': "I've",
+            r'\byou have\b': "you've",
+            r'\bwe have\b': "we've",
+            r'\bthey have\b': "they've"
         }
+        self.casual_fillers = [
+            "you know", "I mean", "like", "actually", "basically",
+            "honestly", "literally", "obviously", "clearly", "definitely"
+        ]
+        self.personal_touches = [
+            "I think", "in my opinion", "from what I've seen", "personally",
+            "if you ask me", "the way I see it", "from my experience"
+        ]
+    def replace_ai_phrases(self, text):
+        """Replace formal AI phrases with more casual alternatives"""
+        for pattern, replacements in self.ai_replacements.items():
+            if re.search(pattern, text, re.IGNORECASE):
+                replacement = random.choice(replacements)
+                text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
+        return text
+    def add_contractions(self, text):
+        """Add contractions to make text more casual"""
+        for pattern, contraction in self.contractions.items():
+            text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
+        return text
+    def vary_sentence_structure(self, text):
+        """Add variety to sentence structure"""
+        try:
+            sentences = sent_tokenize(text)
+            varied_sentences = []
+            for sentence in sentences:
+                # Randomly add sentence starters
+                if random.random() < 0.3 and len(sentence.split()) > 8:
+                    starters = ["Well,", "So,", "Now,", "Look,", "Here's the thing -"]
+                    sentence = f"{random.choice(starters)} {sentence.lower()}"
+                varied_sentences.append(sentence)
+            return " ".join(varied_sentences)
+        except Exception:
+            return text
+    def add_personal_touches(self, text):
+        """Add personal opinions and touches"""
+        sentences = sent_tokenize(text)
+        if len(sentences) > 2 and random.random() < 0.4:
+            insert_pos = random.randint(1, len(sentences) - 1)
+            personal_touch = random.choice(self.personal_touches)
+            sentences[insert_pos] = f"{personal_touch}, {sentences[insert_pos].lower()}"
+        return " ".join(sentences)
+    def add_casual_punctuation(self, text):
+        """Add casual punctuation like dashes and ellipses"""
+        # Replace some periods with dashes for emphasis
+        text = re.sub(r'(\w+)\. ([A-Z])', r'\1 - \2', text)
+        # Add occasional ellipses
+        if random.random() < 0.3:
+            text = re.sub(r'(\w+)\.', r'\1...', text, count=1)
+        return text
+    def add_natural_fillers(self, text):
+        """Add natural conversation fillers"""
+        sentences = sent_tokenize(text)
+        if len(sentences) > 1 and random.random() < 0.5:
+            filler_pos = random.randint(0, len(sentences) - 1)
+            filler = random.choice(self.casual_fillers)
+            sentences[filler_pos] = f"{filler}, {sentences[filler_pos].lower()}"
+        return " ".join(sentences)
+    def clean_text(self, text):
+        """Clean up the text formatting"""
+        # Fix spacing issues
+        text = re.sub(r'\s+', ' ', text)
+        text = re.sub(r'\s+([,.!?])', r'\1', text)
+        # Fix capitalization after sentence starters
+        text = re.sub(r'([.!?]\s+)([a-z])', lambda m: m.group(1) + m.group(2).upper(), text)
+        # Ensure first letter is capitalized
+        if text and text[0].islower():
+            text = text[0].upper() + text[1:]
+        return text.strip()
     def get_readability_score(self, text):
+        """Calculate readability score"""
         try:
             score = flesch_reading_ease(text)
             grade = flesch_kincaid_grade(text)
             return f"Could not calculate readability: {str(e)}"
     def humanize_text(self, text, intensity="medium"):
+        """Main method to humanize AI-generated text"""
         if not text or not text.strip():
             return "Please provide text to humanize."
         try:
             text = text.strip()
+            # Test NLTK functionality
             try:
                 test_tokens = sent_tokenize("This is a test sentence.")
                 if not test_tokens:
                     raise Exception("NLTK tokenization failed")
             except Exception as nltk_error:
                 return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
+            # Apply humanization techniques based on intensity
             text = self.replace_ai_phrases(text)
             text = self.add_contractions(text)
         except Exception as e:
             return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
 def create_interface():
+    """Create the Gradio interface"""
     humanizer = AIContentHumanizer()
     def process_text(input_text, intensity):
         gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
 Transform AI-generated content into human-sounding, casual, and readable text!""")
+        input_text = gr.Textbox(label="AI-generated Text", lines=8, placeholder="Paste your AI-generated text here...")
         intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
         output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
         readability = gr.Textbox(label="Readability Score", lines=2)
+        btn = gr.Button("Humanize Text", variant="primary")
         btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
         input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])