Spaces:

Akash4911
/

fakeshield-api

Running

App Files Files Community

fakeshield-api / debug_s4.py

Akash4911

Production Deploy: Improved robustness and logging

66b6851 9 days ago

raw

history blame contribute delete

1.67 kB

	# debug_s4.py
	import sys, os, math, torch
	import torch.nn.functional as F
	sys.path.append(os.path.join(os.getcwd(), "backend"))

	from app.models.text_detector import GPT2_TOK, GPT2_MDL

	text = """Machine Learning is a subfield of Artificial Intelligence that focuses
	on enabling computers to learn from data and improve their performance without
	being explicitly programmed. Instead of relying on fixed rules, machine learning
	systems adapt by identifying patterns and making decisions based on past experiences.
	This makes them highly flexible and capable of handling complex tasks."""

	enc = GPT2_TOK(
	text, return_tensors="pt", truncation=True, max_length=256
	)
	with torch.no_grad():
	logits = GPT2_MDL(**enc).logits
	log_p = F.log_softmax(logits, dim=-1)
	ids = enc["input_ids"][0]
	tok_lp = log_p[0, range(len(ids)-1), ids[1:]]

	mean_lp = tok_lp.mean().item()
	var_lp = tok_lp.var().item()
	med_lp = tok_lp.median().item()
	hc = (tok_lp > -2.0).float().mean().item()
	vhc = (tok_lp > -1.0).float().mean().item()

	print(f"mean_lp = {round(mean_lp, 6)}")
	print(f"median_lp = {round(med_lp, 6)}")
	print(f"var_lp = {round(var_lp, 6)}")
	print(f"high_conf (>-2.0) = {round(hc, 4)}")
	print(f"very_high (>-1.0) = {round(vhc, 4)}")

	# Show distribution
	vals = tok_lp.tolist()
	print(f"\nMin: {round(min(vals),3)}")
	print(f"Max: {round(max(vals),3)}")
	print(f"Tokens > -1.0: {sum(1 for v in vals if v > -1.0)}")
	print(f"Tokens > -2.0: {sum(1 for v in vals if v > -2.0)}")
	print(f"Tokens > -3.0: {sum(1 for v in vals if v > -3.0)}")
	print(f"Tokens < -5.0: {sum(1 for v in vals if v < -5.0)}")
	print(f"Total tokens: {len(vals)}")