Spaces:

can-org
/

Testing-AI-Contain

Sleeping

Pujan-Dev commited on 8 days ago

Commit

c8f4e3f

1 Parent(s): ddbc845

Add document forgery detection feature and refactor model loading

- Introduced class for detecting document forgery using ELA-trained EfficientNet model.
- Updated to support loading document forgery model from local path.
- Added new API endpoint to check if an uploaded document is forged.
- Refactored imports in various modules for consistency and clarity.

Files changed (6) hide show

features/real_forged_classifier/__init__.py +9 -0
features/real_forged_classifier/controller.py +79 -2
features/real_forged_classifier/inferencer.py +5 -1
features/real_forged_classifier/model_loader.py +142 -36
features/real_forged_classifier/preprocessor.py +1 -1
features/real_forged_classifier/routes.py +19 -2

features/real_forged_classifier/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""Package for real_forged_classifier feature.
+This module ensures package-relative imports work when importing
+`features.real_forged_classifier.*` from the application.
+"""
+__all__ = [
+    'controller', 'routes', 'preprocessor', 'inferencer', 'model_loader', 'model'
+]

features/real_forged_classifier/controller.py CHANGED Viewed

@@ -1,6 +1,15 @@
 from typing import IO
-from preprocessor import preprocessor
-from inferencer import interferencer
 class ClassificationController:
     """
@@ -34,3 +43,71 @@ class ClassificationController:
 # Create a single instance of the controller
 controller = ClassificationController()

 from typing import IO
+import io
+import numpy as np
+from PIL import Image
+import torch
+from torchvision import transforms
+from .preprocessor import preprocessor
+from .inferencer import interferencer
+from .model_loader import models
+from config import Config
 class ClassificationController:
     """
 # Create a single instance of the controller
 controller = ClassificationController()
+class documentForger:
+    """
+    Document forgery detector that uses the ELA-trained EfficientNet model
+    when available (models.doc_model). Returns a dict with verdict and confidence.
+    """
+    def is_forged(self, document_file: IO) -> dict:
+        # Ensure a document model is loaded
+        if not hasattr(models, 'doc_model') or models.doc_model is None:
+            return {"error": "Document forgery model not available."}
+        # Read file bytes
+        try:
+            data = document_file.read()
+            img = Image.open(io.BytesIO(data)).convert('RGB')
+        except Exception as e:
+            return {"error": f"Could not open document image: {e}"}
+        # Compute ELA map (same approach as the notebook)
+        try:
+            buf = io.BytesIO()
+            img.save(buf, format='JPEG', quality=90)
+            buf.seek(0)
+            recompressed = Image.open(buf).convert('RGB')
+            ela_arr = np.abs(np.array(img, dtype=np.float32) - np.array(recompressed, dtype=np.float32))
+            p99 = np.percentile(ela_arr, 99)
+            if p99 > 0:
+                ela_arr = np.clip(ela_arr * (255.0 / p99), 0, 255).astype(np.uint8)
+            else:
+                ela_arr = ela_arr.astype(np.uint8)
+            ela_pil = Image.fromarray(ela_arr, mode='RGB')
+        except Exception as e:
+            return {"error": f"Failed to compute ELA: {e}"}
+        # Transform and run through model
+        transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+        ])
+        tensor = transform(ela_pil).unsqueeze(0).to(models.device)
+        with torch.no_grad():
+            logits = models.doc_model(tensor)
+            probs = torch.softmax(logits, dim=1)[0, 1].item()
+        # Interpret confidence using configurable thresholds (values in 0..1)
+        low = getattr(Config, 'DOCUMENT_FORGERY_POSSIBLE_LOW', 0.40)
+        high = getattr(Config, 'DOCUMENT_FORGERY_FORGED_LOW', 0.55)
+        if probs < low:
+            verdict = 'LIKELY AUTHENTIC'
+        elif probs < high:
+            verdict = 'POSSIBLY FORGED'
+        else:
+            verdict = 'LIKELY FORGED'
+        return {
+            "verdict": verdict,
+            "confidence": float(probs),
+            "confidence_pct": round(float(probs) * 100, 2),
+        }
+# Create a single instance of the document forger
+document_forger = documentForger()

features/real_forged_classifier/inferencer.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch.nn.functional as F
 import numpy as np
 # Import the globally loaded models instance
-from model_loader import models
 class Interferencer:
     """
@@ -26,6 +26,10 @@ class Interferencer:
         Returns:
             dict: A dictionary containing the classification label and confidence score.
         """
         # 1. Get model outputs (logits)
         outputs = self.fft_model(image_tensor)

 import numpy as np
 # Import the globally loaded models instance
+from .model_loader import models
 class Interferencer:
     """
         Returns:
             dict: A dictionary containing the classification label and confidence score.
         """
+        # 0. Ensure model is loaded
+        if self.fft_model is None:
+            return {"error": "FFT model not loaded."}
         # 1. Get model outputs (logits)
         outputs = self.fft_model(image_tensor)

features/real_forged_classifier/model_loader.py CHANGED Viewed

@@ -1,61 +1,167 @@
-import torch
 from pathlib import Path
-from huggingface_hub import hf_hub_download
-from model import FFTCNN # Import the model architecture
 from config import Config
 class ModelLoader:
     """
-    A class to load and hold the PyTorch CNN model.
-    """
-    def __init__(self, model_repo_id: str, model_filename: str):
-        """
-        Initializes the ModelLoader and loads the model.
-        Args:
-            model_repo_id (str): The repository ID on Hugging Face.
-            model_filename (str): The name of the model file (.pth) in the repository.
-        """
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {self.device}")
-        self.fft_model = self._load_fft_model(repo_id=model_repo_id, filename=model_filename)
-        print("FFT CNN model loaded successfully.")
-    def _load_fft_model(self, repo_id: str, filename: str):
-        """
-        Downloads and loads the FFT CNN model from a Hugging Face Hub repository.
-        Args:
-            repo_id (str): The repository ID on Hugging Face.
-            filename (str): The name of the model file (.pth) in the repository.
-        Returns:
-            The loaded PyTorch model object.
-        """
-        print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
         try:
-            # Download the model file from the Hub. It returns the cached path.
             model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
             print(f"Model downloaded to: {model_path}")
-            # Initialize the model architecture
             model = FFTCNN()
-            # Load the saved weights (state_dict) into the model
             model.load_state_dict(torch.load(model_path, map_location=torch.device(self.device)))
-            # Set the model to evaluation mode
             model.to(self.device)
             model.eval()
             return model
         except Exception as e:
-            print(f"Error downloading or loading model from Hugging Face: {e}")
             raise
 # --- Global Model Instance ---
 MODEL_REPO_ID = Config.REAL_FORGED_MODEL_REPO_ID
 MODEL_FILENAME = Config.REAL_FORGED_MODEL_FILENAME
-models = ModelLoader(model_repo_id=MODEL_REPO_ID, model_filename=MODEL_FILENAME)

 from pathlib import Path
+from typing import Any
+from .model import FFTCNN # Import the FFT CNN architecture (package-relative)
 from config import Config
+# NOTE: EfficientNet/nn imports are done lazily when torch is available.
+ELAForgeryNet = None  # will be constructed dynamically when needed
+torch = None
+TORCH_AVAILABLE = False
 class ModelLoader:
+    """A class to load and hold PyTorch models used by this feature.
+    It loads:
+      - an FFT-based CNN (downloaded from Hugging Face Hub)
+      - an ELA-based document forgery detector (local .pth by default)
     """
+    def __init__(self, model_repo_id: str, model_filename: str, doc_model_path: str = None):
+        # Try to import torch once and expose module-level variables
+        global torch, TORCH_AVAILABLE
+        try:
+            import torch as _torch
+            torch = _torch
+            TORCH_AVAILABLE = True
+        except Exception:
+            torch = None
+            TORCH_AVAILABLE = False
+            print("[WARN] PyTorch not available; model loading will be skipped until torch is installed.")
+        if TORCH_AVAILABLE:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = "cpu"
+        print(f"Using device: {self.device} (torch available: {TORCH_AVAILABLE})")
+        # Load FFT CNN from HF Hub
+        self.fft_model = None
+        if TORCH_AVAILABLE:
+            try:
+                self.fft_model = self._load_fft_model(repo_id=model_repo_id, filename=model_filename)
+                print("FFT CNN model loaded successfully from Hub.")
+            except Exception:
+                # Try local fallback path (if provided in config)
+                self.fft_model = None
+                local_path = Path(getattr(Config, 'REAL_FORGED_MODEL_LOCAL_PATH', ''))
+                if local_path and local_path.exists():
+                    try:
+                        print(f"Attempting to load FFT model from local path: {local_path}")
+                        model = FFTCNN()
+                        state = torch.load(str(local_path), map_location=torch.device(self.device))
+                        state_dict = state.get('state_dict', state) if isinstance(state, dict) else state
+                        model.load_state_dict(state_dict, strict=False)
+                        model.to(self.device)
+                        model.eval()
+                        self.fft_model = model
+                        print("FFT CNN model loaded successfully from local path.")
+                    except Exception as e:
+                        print(f"Failed to load local FFT model: {e}")
+                else:
+                    print("No local FFT model path configured or file missing; FFT model not loaded.")
+        else:
+            print("Skipping FFT model load because PyTorch is not installed.")
+        # Load document forgery model (ELA CNN) from local path if present
+        self.doc_model = None
+        if doc_model_path is None:
+            doc_model_path = Config.DOCUMENT_FORGERY_MODEL_PATH
+        self.doc_model = None
+        if TORCH_AVAILABLE:
+            try:
+                self.doc_model = self._load_document_forgery_model(Path(doc_model_path))
+                if self.doc_model is not None:
+                    print("Document forgery (ELA) model loaded successfully.")
+            except Exception as e:
+                print(f"Warning: failed to load document forgery model: {e}")
+        else:
+            print("Skipping document forgery model load because PyTorch is not installed.")
+    def _load_fft_model(self, repo_id: str, filename: str):
+        """Downloads and loads the FFT CNN model from a Hugging Face Hub repository."""
+        print(f"Attempting to download FFT CNN model from Hugging Face repo: {repo_id}")
+        try:
+            from huggingface_hub import hf_hub_download
+        except Exception as e:
+            raise RuntimeError(f"huggingface_hub not available: {e}")
         try:
             model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
             print(f"Model downloaded to: {model_path}")
             model = FFTCNN()
             model.load_state_dict(torch.load(model_path, map_location=torch.device(self.device)))
             model.to(self.device)
             model.eval()
             return model
         except Exception as e:
+            print(f"Error downloading or loading FFT model from Hugging Face: {e}")
             raise
+    def _load_document_forgery_model(self, path: Path):
+        """Load the ELA-based document forgery model from a local .pth checkpoint.
+        Returns the model instance or None if the file does not exist.
+        """
+        # If the configured path doesn't exist, try sensible fallbacks in the repo.
+        if not path.exists():
+            print(f"Document forgery model file not found at configured path: {path}")
+            # 1) Try features/Model/document_forgery/ela_cnn_model.pth relative to repo root
+            repo_root = Path(__file__).resolve().parents[2]
+            candidate = repo_root / 'features' / 'Model' / 'document_forgery' / 'ela_cnn_model.pth'
+            if candidate.exists():
+                path = candidate
+                print(f"Found document forgery model at fallback path: {path}")
+            else:
+                # 2) Search the repo for any file named ela_cnn_model.pth
+                print("Searching repository for 'ela_cnn_model.pth'...")
+                matches = list(repo_root.rglob('ela_cnn_model.pth'))
+                if matches:
+                    path = matches[0]
+                    print(f"Found document forgery model at: {path}")
+                else:
+                    print("Document forgery model not found in repository; skipping load.")
+                    return None
+        print(f"Loading document forgery model from: {path}")
+        # Build the ELA model architecture lazily (requires torchvision & torch.nn)
+        try:
+            import torchvision.models as tv_models
+            import torch.nn as nn
+        except Exception as e:
+            raise RuntimeError(f"Required packages for ELA model not available: {e}")
+        backbone = tv_models.efficientnet_b0(weights='IMAGENET1K_V1')
+        in_features = backbone.classifier[1].in_features
+        backbone.classifier = nn.Sequential(
+            nn.Dropout(p=0.4),
+            nn.Linear(in_features, 256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(p=0.2),
+            nn.Linear(256, 2),
+        )
+        model = backbone
+        state = torch.load(str(path), map_location=torch.device(self.device))
+        # The checkpoint might be either a state_dict or a full checkpoint dict
+        if isinstance(state, dict) and 'state_dict' in state:
+            state_dict = state['state_dict']
+        else:
+            state_dict = state
+        # Attempt to load state dict; allow strict=False to be tolerant to minor key name differences
+        model.load_state_dict(state_dict, strict=False)
+        model.to(self.device)
+        model.eval()
+        return model
 # --- Global Model Instance ---
 MODEL_REPO_ID = Config.REAL_FORGED_MODEL_REPO_ID
 MODEL_FILENAME = Config.REAL_FORGED_MODEL_FILENAME
+DOC_MODEL_PATH = Config.DOCUMENT_FORGERY_MODEL_PATH
+models = ModelLoader(model_repo_id=MODEL_REPO_ID, model_filename=MODEL_FILENAME, doc_model_path=DOC_MODEL_PATH)

features/real_forged_classifier/preprocessor.py CHANGED Viewed

@@ -6,7 +6,7 @@ import cv2
 from torchvision import transforms
 # Import the globally loaded models instance
-from model_loader import models
 class ImagePreprocessor:
     """

 from torchvision import transforms
 # Import the globally loaded models instance
+from .model_loader import models
 class ImagePreprocessor:
     """

features/real_forged_classifier/routes.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from fastapi import APIRouter, File, UploadFile, HTTPException, status
 from fastapi.responses import JSONResponse
-# Import the controller instance
-from controller import controller
 # Create an API router
 router = APIRouter()
@@ -35,3 +35,20 @@ async def classify_image_endpoint(image: UploadFile = File(...)):
     return JSONResponse(content=result, status_code=status.HTTP_200_OK)

 from fastapi import APIRouter, File, UploadFile, HTTPException, status
 from fastapi.responses import JSONResponse
+# Import the controller instance and document forger
+from .controller import controller, document_forger
 # Create an API router
 router = APIRouter()
     return JSONResponse(content=result, status_code=status.HTTP_200_OK)
+@router.post("/isforged", summary="Check if the document is forged")
+async def is_forged_endpoint(file: UploadFile = File(...)):
+    """Run the document forgery detector on an uploaded image file.
+    Accepts image uploads (multipart/form-data) and returns a JSON verdict with confidence.
+    """
+    if not file.content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+            detail="Unsupported file type. Please upload an image (e.g., JPEG, PNG)."
+        )
+    result = document_forger.is_forged(file.file)
+    if isinstance(result, dict) and result.get("error"):
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=result.get("error"))
+    return JSONResponse(content=result, status_code=status.HTTP_200_OK)