Chungulus's picture
Upload folder using huggingface_hub
fea8d44 verified
# =============================================
# HuForm AI Mini - Gradio UI
# AI-generated text detection + humanisation
# Clean version – generation warnings removed
# Last updated for transformers 2025–2026
# =============================================
# ── 1. Install dependencies ───────────────────────────────────────
# !pip install -q gradio transformers torch accelerate
# ── 2. Imports ─────────────────────────────────────────────────────
import gradio as gr
import torch
import re
from transformers import (
pipeline,
AutoTokenizer,
AutoModelForCausalLM,
GenerationConfig
)
# ── 3. Configuration ───────────────────────────────────────────────
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE.upper()}")
# Detection model – good open-source choice
DETECTION_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta"
# Humanisation model – fast and decent quality
HUMANISATION_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
# ── 4. Lazy model loading ──────────────────────────────────────────
_detection_pipe = None
def get_detection():
global _detection_pipe
if _detection_pipe is None:
print(f"Loading detector: {DETECTION_MODEL}")
_detection_pipe = pipeline(
"text-classification",
model=DETECTION_MODEL,
device=0 if DEVICE == "cuda" else -1,
torch_dtype=torch.float16 if DEVICE == "cuda" else None
)
return _detection_pipe
_humanisation_pipe = None
def get_humaniser():
global _humanisation_pipe
if _humanisation_pipe is None:
print(f"Loading humaniser: {HUMANISATION_MODEL}")
tokenizer = AutoTokenizer.from_pretrained(HUMANISATION_MODEL)
model = AutoModelForCausalLM.from_pretrained(
HUMANISATION_MODEL,
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
device_map="auto" if DEVICE == "cuda" else None
)
_humanisation_pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
return _humanisation_pipe
# ── 5. Helper functions ────────────────────────────────────────────
def split_sentences(text):
if not text.strip():
return []
return [s.strip() for s in re.split(r'(?<=[.!?])\s+', text.strip()) if s.strip()]
def detect_ai(text):
if not text.strip():
return "No text provided.", ""
sentences = split_sentences(text)
pipe = get_detection()
results = []
total_ai = 0.0
preds = pipe(sentences, truncation=True, max_length=512)
for sent, pred in zip(sentences, preds):
label = pred['label'].lower()
score = pred['score']
# Normalize to AI probability (model-specific)
ai_prob = score * 100 if any(x in label for x in ["fake", "ai", "generated"]) else (1 - score) * 100
total_ai += ai_prob
tag = "Very likely AI" if ai_prob > 85 else "Likely AI" if ai_prob > 60 else "Likely Human"
color = "#dc2626" if ai_prob > 85 else "#d97706" if ai_prob > 60 else "#16a34a"
results.append(
f"<div style='padding:8px; margin:4px 0; border-left:4px solid {color};'>"
f"<strong>{tag} ({ai_prob:.1f}%)</strong><br>{sent}</div>"
)
avg = total_ai / len(sentences) if sentences else 0
summary = f"<h3>Overall AI probability: {avg:.1f}%</h3>"
return summary + "".join(results), f"Overall: {avg:.1f}% AI"
def humanise(text, style="Natural", intensity=0.7):
if not text.strip():
return "Please enter some text."
pipe = get_humaniser()
style_prompts = {
"Natural": "Rewrite this to sound completely natural, human-written β€” vary sentence length, use contractions, slight imperfections.",
"Casual": "Rewrite this in a relaxed, friendly, conversational tone like a real person chatting.",
"Academic": "Rewrite this in clear, formal academic style with precise and sophisticated language.",
"Professional": "Rewrite this in a crisp, professional business tone β€” confident and authoritative."
}
tone = style_prompts.get(style, style_prompts["Natural"])
prompt = f"""<|im_start|>system
You are an expert editor that removes AI stiffness and makes text feel authentically human.
Keep original meaning 100%. Improve flow, rhythm, vocabulary variety. Output ONLY the rewritten text.<|im_end|>
<|im_start|>user
{tone}
Text:
{text}<|im_end|>
<|im_start|>assistant
"""
try:
# ── Explicit GenerationConfig – removes both warnings ──
gen_config = GenerationConfig(
max_new_tokens=600,
temperature=0.4 + float(intensity) * 0.5,
top_p=0.92,
repetition_penalty=1.08,
do_sample=True,
pad_token_id=pipe.tokenizer.eos_token_id,
eos_token_id=pipe.tokenizer.eos_token_id
)
gen_config.max_length = None # ← disables conflicting default max_length
output = pipe(
prompt,
generation_config=gen_config,
num_return_sequences=1
)[0]["generated_text"]
# Extract after assistant tag
if "assistant" in output:
rewritten = output.split("assistant", 1)[-1].strip()
else:
rewritten = output[len(prompt):].strip()
return rewritten.strip()
except Exception as e:
return f"Error during generation: {str(e)}"
# ── 6. Gradio Interface ────────────────────────────────────────────
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# HuForm AI Mini\n**Sentence-level AI detection + style-controlled humanisation**")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Input Text (paragraph)",
placeholder="Paste or type text here...",
lines=8,
max_lines=20
)
style_dropdown = gr.Dropdown(
choices=["Natural", "Casual", "Academic", "Professional"],
value="Natural",
label="Humanisation Style"
)
intensity_slider = gr.Slider(
minimum=0.1, maximum=1.0, value=0.7, step=0.05,
label="Rewrite Intensity (higher = more creative change)"
)
with gr.Row():
detect_btn = gr.Button("Analyze (Detect AI)")
humanise_btn = gr.Button("Rewrite / Humanise")
with gr.Column(scale=1):
detection_output = gr.HTML(label="Detection Result")
humanised_output = gr.Textbox(label="Rewritten Text", lines=10)
# ── Event handlers ─────────────────────────────────────────────
detect_btn.click(
fn=detect_ai,
inputs=input_text,
outputs=[detection_output, gr.Textbox(visible=False)]
)
humanise_btn.click(
fn=humanise,
inputs=[input_text, style_dropdown, intensity_slider],
outputs=humanised_output
)
# Example texts
gr.Examples(
examples=[
["The rapid advancement of artificial intelligence technologies has significantly transformed numerous industries and daily life."],
["Yo this new AI stuff is actually kinda wild, like it's everywhere now lol."],
["Machine learning algorithms demonstrate superior performance in pattern recognition tasks across diverse datasets."]
],
inputs=input_text,
label="Quick examples"
)
# ── Launch ─────────────────────────────────────────────────────────
demo.launch(debug=False, share=True)