eval mode, fixed, full binary mode

7b33404 9 months ago

10.8 kB

	"""
	Define loss functions needed for training the model — padding safe (-1 sentinel)
	"""

	import torch
	import torch.nn.functional as F
	from torchmetrics.functional.classification import (
	auroc, average_precision, roc, precision_recall_curve
	)
	import rootutils
	from dpacman.utils import pylogger

	root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
	logger = pylogger.RankedLogger(__name__, rank_zero_only=True)


	def _expand_like(mask: torch.Tensor, like: torch.Tensor):
	# Make mask broadcastable to logits/targets (handles (B,L) vs (B,L,1))
	while mask.dim() < like.dim():
	mask = mask.unsqueeze(-1)
	return mask.expand_as(like)

	def bce_loss_masked(logits, targets, mask, pos_weight=None, eps=1e-8):
	"""
	Compute masked BCE with logits over non-peak positions only.
	Expects nonpeak_mask already broadcastable to logits.
	"""
	# Clamp targets into [0,1] to be safe, even if pads slip through earlier
	t = targets.clamp(0.0, 1.0)
	loss = F.binary_cross_entropy_with_logits(
	logits, t, reduction="none", pos_weight=pos_weight
	)
	m = _expand_like(mask, loss).to(loss.dtype)
	denom = m.sum().clamp_min(eps)
	return (loss * m).sum() / denom

	def mse_peaks_only(logits, targets, peak_mask, eps=1e-8):
	"""
	Calculate MSE on peaks only (on probabilities), masking everything else.
	"""
	probs = torch.sigmoid(logits)
	per_elem = F.mse_loss(probs, targets, reduction="none")
	m = _expand_like(peak_mask, per_elem).to(per_elem.dtype)
	denom = m.sum().clamp_min(eps)
	return (per_elem * m).sum() / denom

	def calculate_loss(
	logits,
	targets,
	binder_kpm,
	glm_kpm,
	eps: float = 1e-8,
	alpha: float = 1.0,
	gamma: float = 1.0,
	pos_weight=None,
	pad_value: float = -1.0,
	loss_type="mixed"
	):
	"""
	Combine masked-BCE (non-peak) + masked-MSE on probs (peak), ignoring padding.
	Assumes targets == -1 are pads; non-peak = 0; peak > 0.

	binder_kpm is 1 at PAD positions, 0 elsewhere
	glm_kpm is 1 at PAD positions, 0 elsewhere

	if loss_type is mixed, we're doing binary cross entropy off the peaks and MSE on the peaks.
	if loss_type is binary, we're doing binary cross entropy everywhere because the labels are binary.
	"""
	# calculate validity in two ways; these should be the same.
	# targets are padded to -1 where there is not really a DNA sequence there
	valid = (targets != pad_value)
	if glm_kpm is not None:
	nvalid = torch.sum(valid).item()
	nvalid_2 = torch.sum(~glm_kpm).item()
	assert nvalid==nvalid_2

	# Peak / non-peak masks that exclude pads
	nonpeak_mask = valid & (targets == 0)
	peak_mask = valid & (targets > 0)

	# For safety, zero-out targets at pad positions so they never feed into BCE/MSE
	targets_safe = torch.where(valid, targets, torch.zeros_like(targets))

	if loss_type=="mixed":
	bce_nonpeak = bce_loss_masked(logits, targets_safe, nonpeak_mask, pos_weight=pos_weight, eps=eps)
	mse_peak = mse_peaks_only(logits, targets_safe, peak_mask, eps=eps)
	return alpha * bce_nonpeak + gamma * mse_peak
	else:
	# we're expecting all binary labels. make sure.
	all_binary = ((targets_safe==1) \| (targets_safe==0)).all().item()
	if not(all_binary):
	logger.info(f"WARNING: expecting all binary labels for loss_type={loss_type}. Did not get all binary labels.")
	# bce over all valid positions
	bce_all = bce_loss_masked(logits, targets_safe, valid, pos_weight=pos_weight, eps=eps)
	return alpha*bce_all

	@torch.no_grad()
	def auroc_zeros_vs_ones_from_logits(
	logits: torch.Tensor, # (B, L)
	labels: torch.Tensor, # (B, L)
	glm_kpm: torch.Tensor \| None = None, # (B, L) True=PAD
	pos_thresh: float = 0.99,
	):
	"""
	Returns:
	auc: scalar tensor (AUROC)
	n_pos, n_neg: ints
	tpr, fpr: tensors of shape (T,)
	thresholds: tensor of shape (T,)
	tp, fp: integer counts per threshold (shape (T,))
	"""
	device = logits.device
	# glm_kpm is 1 where there's a pad, so ~glm_kpm is valid positions
	valid = ~glm_kpm if glm_kpm is not None else torch.ones_like(labels, dtype=torch.bool, device=device)
	keep = valid & ((labels > pos_thresh) \| (labels == 0.0))
	if keep.sum() == 0:
	return (torch.tensor(float('nan'), device=device), 0, 0,
	torch.empty(0, device=device), torch.empty(0, device=device),
	torch.empty(0, device=device), torch.empty(0, device=device), torch.empty(0, device=device))

	y = (labels[keep] > pos_thresh).to(torch.int)
	s = logits[keep]

	n_pos = int(y.sum().item())
	n_neg = y.numel() - n_pos
	if n_pos == 0 or n_neg == 0:
	return (torch.tensor(float('nan'), device=device), n_pos, n_neg,
	torch.empty(0, device=device), torch.empty(0, device=device),
	torch.empty(0, device=device), torch.empty(0, device=device), torch.empty(0, device=device))

	# Full ROC curve
	fpr, tpr, thresholds = roc(s, y, task="binary")
	# AUROC (TM handles logits)
	auc = auroc(s, y, task="binary")

	# Convert rates to counts (round to nearest to avoid float off-by-one)
	tp = (tpr * n_pos).round().to(torch.long)
	fp = (fpr * n_neg).round().to(torch.long)

	return auc.to(device), n_pos, n_neg, tpr.to(device), fpr.to(device), thresholds.to(device), tp.to(device), fp.to(device)


	@torch.no_grad()
	def auprc_zeros_vs_ones_from_logits(
	logits: torch.Tensor, # (B, L)
	labels: torch.Tensor, # (B, L)
	glm_kpm: torch.Tensor \| None = None, # (B, L) True=PAD
	pos_thresh: float = 0.99,
	):
	"""
	Returns:
	ap: scalar tensor (Average Precision / AUPRC)
	n_pos, n_neg: ints
	precision: (T,)
	recall: (T,)
	thresholds: (T,)
	"""
	device = logits.device
	# glm_kpm is 1 where there's a pad, so ~glm_kpm is valid
	valid = ~glm_kpm if glm_kpm is not None else torch.ones_like(labels, dtype=torch.bool, device=device)
	keep = valid & ((labels > pos_thresh) \| (labels == 0.0))
	if keep.sum() == 0:
	return (torch.tensor(float('nan'), device=device), 0, 0,
	torch.empty(0, device=device), torch.empty(0, device=device), torch.empty(0, device=device))

	y = (labels[keep] > pos_thresh).to(torch.int)
	s = logits[keep]

	n_pos = int(y.sum().item())
	n_neg = y.numel() - n_pos
	if n_pos == 0:
	# By convention, AP=0 when there are no positives
	return (torch.tensor(0.0, device=device), 0, n_neg,
	torch.empty(0, device=device), torch.empty(0, device=device), torch.empty(0, device=device))

	# Full PR curve
	precision, recall, thresholds = precision_recall_curve(s, y, task="binary")
	# Average Precision / AUPRC
	ap = average_precision(s, y, task="binary")

	return ap.to(device), n_pos, n_neg, precision.to(device), recall.to(device), thresholds.to(device)

	def accuracy_percentage(
	logits,
	targets,
	peak_thresh: float = 0.5,
	eps: float = 1e-8,
	pad_value: float = -1.0,
	):
	"""
	Compute accuracy for predicting high-confidence peaks (prob >= 0.5), ignoring padding.
	"""
	valid = (targets != pad_value)
	probs = torch.sigmoid(logits)
	preds_bin = (probs >= 0.5)
	labels = (targets >= peak_thresh)

	v = _expand_like(valid, preds_bin)
	correct = ((preds_bin == labels) & v).to(torch.float32).sum()
	total = v.to(torch.float32).sum().clamp_min(eps)
	return (correct / total).item() * 100.0

	if __name__ == "__main__":
	import torch

	torch.manual_seed(0)
	PAD = -1.0

	def make_targets_BL(B=2, L=8, pad_positions=(6, 7)):
	"""Create (B,L) targets: 0=non-peak, >0=peak, -1=pad."""
	t = torch.zeros(B, L)
	# sprinkle a few peaks (values in [0.6, 1.0])
	t[:, 1] = torch.rand(B) * 0.4 + 0.6
	t[:, 3] = torch.rand(B) * 0.4 + 0.6
	# pads
	for p in pad_positions:
	t[:, p] = PAD
	return t

	def make_targets_BLC(B=2, L=8, C=3, pad_positions=(6, 7)):
	"""
	Create (B,L,C) targets by broadcasting a (B,L) base across channels
	(so masking needs to expand correctly).
	"""
	base = make_targets_BL(B, L, pad_positions) # (B,L)
	t = base.unsqueeze(-1).expand(-1, -1, C).clone()
	# Make channel 1 slightly different to show per-channel variety
	t[..., 1] = torch.where(t[..., 1] > 0, (t[..., 1] * 0.85).clamp(0, 1), t[..., 1])
	return t

	def mask_stats(name, logits, targets, pad_value=PAD):
	valid = (targets != pad_value)
	nonpeak_mask = valid & (targets == 0)
	peak_mask = valid & (targets > 0)

	m_nonpeak = _expand_like(nonpeak_mask, logits)
	m_peak = _expand_like(peak_mask, logits)

	print(f"\n[{name}]")
	print(f" logits.shape = {tuple(logits.shape)}")
	print(f" targets.shape = {tuple(targets.shape)}")
	# Previews (first batch)
	if targets.dim() == 2: # (B,L)
	print(f" targets[0,:] preview: {targets[0]}")
	else: # (B,L,C)
	print(f" targets[0,:,0] ch0 preview: {targets[0,:,0]}")
	print(f" targets[0,:,1] ch1 preview: {targets[0,:,1]}")
	# Mask counts after EXPANSION (these define denominators)
	print(f" #non-peak elems used = {m_nonpeak.sum().item():.0f}")
	print(f" #peak elems used = {m_peak.sum().item():.0f}")

	# =========================
	# Case A: (B, L)
	# =========================
	B, L = 2, 8
	logits_BL = torch.randn(B, L) # raw scores
	targets_BL = make_targets_BL(B, L) # 0, >0, and -1 pads

	mask_stats("BL", logits_BL, targets_BL, pad_value=PAD)

	loss_BL = calculate_loss(
	logits_BL, targets_BL, pad_value=PAD, alpha=1.0, gamma=1.0
	)
	acc_BL = accuracy_percentage(
	logits_BL, targets_BL, pad_value=PAD, peak_thresh=0.5
	)

	print(f" loss_BL = {loss_BL.item():.6f}")
	print(f" acc_BL = {acc_BL:.2f}%")

	# =========================
	# Case B: (B, L, C)
	# =========================
	B, L, C = 2, 8, 3
	logits_BLC = torch.randn(B, L, C) # raw scores with channels
	targets_BLC = make_targets_BLC(B, L, C) # broadcasted targets + tweaks

	mask_stats("BLC", logits_BLC, targets_BLC, pad_value=PAD)

	loss_BLC = calculate_loss(
	logits_BLC, targets_BLC, pad_value=PAD, alpha=1.0, gamma=1.0
	)
	acc_BLC = accuracy_percentage(
	logits_BLC, targets_BLC, pad_value=PAD, peak_thresh=0.5
	)

	print(f" loss_BLC = {loss_BLC.item():.6f}")
	print(f" acc_BLC = {acc_BLC:.2f}%")