diff --git a/.env-example b/.env-example old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index cb7bd884aa5300758115514465844b58fd4035b7..aa6107a74728dc90fb68502e0e1d2634ae9867dd --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,9 @@ models/.gitattributes #<-- This line can stay if you only want to ignore that f todo.md np_text_model +IMG_Models +notebooks +# Ignore model and tokenizer files +np_text_model/classifier/sentencepiece.bpe.model +np_text_model/classifier/tokenizer.json + diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 index b60a0eac3ad94b5bf5e6f0db14fcb3d3f4c78c54..97fdacd23f849777907e5762064f820c55ac879c --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,19 @@ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker # you will also find guides on how best to write your Dockerfile -FROM python:3.9 +FROM python:3.10 +# Create user first RUN useradd -m -u 1000 user + +# Install system dependencies (requires root) +RUN apt-get update && apt-get install -y libgl1 + +# Switch to non-root user USER user ENV PATH="/home/user/.local/bin:$PATH" +# Add TensorFlow environment variables to reduce logging noise WORKDIR /app COPY --chown=user ./requirements.txt requirements.txt @@ -14,4 +21,6 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt RUN python -m spacy download en_core_web_sm || echo "Failed to download model" COPY --chown=user . /app + CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] + diff --git a/Procfile b/Procfile old mode 100644 new mode 100755 diff --git a/README.md b/README.md index 7321a534904ad04ba0f2159d35fac3df754a85ca..b036cf52798c9188a302e93567fedb728c9e1d66 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,152 @@ +# AI-Contain-Checker + +A modular AI content detection system with support for **image classification**, **image edit detection**, **Nepali text classification**, and **general text classification**. Built for performance and extensibility, it is ideal for detecting AI-generated content in both visual and textual forms. + + +## 🌟 Features + +### πŸ–ΌοΈ Image Classifier + +* **Purpose**: Classifies whether an image is AI-generated or a real-life photo. +* **Model**: Fine-tuned **InceptionV3** CNN. +* **Dataset**: Custom curated dataset with **\~79,950 images** for binary classification. +* **Location**: [`features/image_classifier`](features/image_classifier) +* **Docs**: [`docs/features/image_classifier.md`](docs/features/image_classifier.md) + +### πŸ–ŒοΈ Image Edit Detector + +* **Purpose**: Detects image tampering or post-processing. +* **Techniques Used**: + + * **Error Level Analysis (ELA)**: Visualizes compression artifacts. + * **Fast Fourier Transform (FFT)**: Detects unnatural frequency patterns. +* **Location**: [`features/image_edit_detector`](features/image_edit_detector) +* **Docs**: + + * [ELA](docs/detector/ELA.md) + * [FFT](docs/detector/fft.md ) + * [Metadata Analysis](docs/detector/meta.md) + * [Backend Notes](docs/detector/note-for-backend.md) + +### πŸ“ Nepali Text Classifier + +* **Purpose**: Determines if Nepali text content is AI-generated or written by a human. +* **Model**: Based on `XLMRClassifier` fine-tuned on Nepali language data. +* **Dataset**: Scraped dataset of **\~18,000** Nepali texts. +* **Location**: [`features/nepali_text_classifier`](features/nepali_text_classifier) +* **Docs**: [`docs/features/nepali_text_classifier.md`](docs/features/nepali_text_classifier.md) + +### 🌐 English Text Classifier + +* **Purpose**: Detects if English text is AI-generated or human-written. +* **Pipeline**: + + * Uses **GPT2 tokenizer** for input preprocessing. + * Custom binary classifier to differentiate between AI and human-written content. +* **Location**: [`features/text_classifier`](features/text_classifier) +* **Docs**: [`docs/features/text_classifier.md`](docs/features/text_classifier.md) + --- -title: Ai-Checker -emoji: πŸš€ -colorFrom: yellow -colorTo: blue -sdk: docker -pinned: false + +## πŸ—‚οΈ Project Structure + +```bash +AI-Checker/ +β”‚ +β”œβ”€β”€ app.py # Main FastAPI entry point +β”œβ”€β”€ config.py # Configuration settings +β”œβ”€β”€ Dockerfile # Docker build script +β”œβ”€β”€ Procfile # Deployment file for Heroku or similar +β”œβ”€β”€ requirements.txt # Python dependencies +β”œβ”€β”€ README.md # You are here πŸ“˜ +β”‚ +β”œβ”€β”€ features/ # Core detection modules +β”‚ β”œβ”€β”€ image_classifier/ +β”‚ β”œβ”€β”€ image_edit_detector/ +β”‚ β”œβ”€β”€ nepali_text_classifier/ +β”‚ └── text_classifier/ +β”‚ +β”œβ”€β”€ docs/ # Internal and API documentation +β”‚ β”œβ”€β”€ api_endpoints.md +β”‚ β”œβ”€β”€ deployment.md +β”‚ β”œβ”€β”€ detector/ +β”‚ β”‚ β”œβ”€β”€ ELA.md +β”‚ β”‚ β”œβ”€β”€ fft.md +β”‚ β”‚ β”œβ”€β”€ meta.md +β”‚ β”‚ └── note-for-backend.md +β”‚ β”œβ”€β”€ functions.md +β”‚ β”œβ”€β”€ nestjs_integration.md +β”‚ β”œβ”€β”€ security.md +β”‚ β”œβ”€β”€ setup.md +β”‚ └── structure.md +β”‚ +β”œβ”€β”€ IMG_Models/ # Saved image classifier model(s) +β”‚ └── latest-my_cnn_model.h5 +β”‚ +β”œβ”€β”€ notebooks/ # Experimental and debug notebooks +β”œβ”€β”€ static/ # Static assets if needed +└── test.md # Test notes +```` + --- +## πŸ“š Documentation Links + +* [API Endpoints](docs/api_endpoints.md) +* [Deployment Guide](docs/deployment.md) +* [Detector Documentation](docs/detector/) + + * [Error Level Analysis (ELA)](docs/detector/ELA.md) + * [Fast Fourier Transform (FFT)](docs/detector/fft.md) + * [Metadata Analysis](docs/detector/meta.md) + * [Backend Notes](docs/detector/note-for-backend.md) +* [Functions Overview](docs/functions.md) +* [NestJS Integration Guide](docs/nestjs_integration.md) +* [Security Details](docs/security.md) +* [Setup Instructions](docs/setup.md) +* [Project Structure](docs/structure.md) + +--- + +## πŸš€ Usage + +1. **Install dependencies** + + ```bash + pip install -r requirements.txt + ``` + +2. **Run the API** + + ```bash + uvicorn app:app --reload + ``` + +3. **Build Docker (optional)** + + ```bash + docker build -t ai-contain-checker . + docker run -p 8000:8000 ai-contain-checker + ``` + +--- + +## πŸ” Security & Integration + +* **Token Authentication** and **IP Whitelisting** supported. +* NestJS integration guide: [`docs/nestjs_integration.md`](docs/nestjs_integration.md) +* Rate limiting handled using `slowapi`. + +--- + +## πŸ›‘οΈ Future Plans + +* Add **video classifier** module. +* Expand dataset for **multilingual** AI content detection. +* Add **fine-tuning UI** for models. + +--- + +## πŸ“„ License + +See full license terms here: [`LICENSE.md`](license.md) diff --git a/__init__.py b/__init__.py old mode 100644 new mode 100755 diff --git a/app.py b/app.py old mode 100644 new mode 100755 index 49e4bd6b2528f0ffc453a41de564bc42d8ec0134..2215bd7ffb2318dadcb44fe16f19493f5afce664 --- a/app.py +++ b/app.py @@ -1,37 +1,62 @@ from fastapi import FastAPI, Request from slowapi import Limiter, _rate_limit_exceeded_handler +from fastapi.responses import FileResponse from slowapi.middleware import SlowAPIMiddleware from slowapi.errors import RateLimitExceeded from slowapi.util import get_remote_address from fastapi.responses import JSONResponse from features.text_classifier.routes import router as text_classifier_router -from features.nepali_text_classifier.routes import router as nepali_text_classifier_router +from features.nepali_text_classifier.routes import ( + router as nepali_text_classifier_router, +) +from features.image_classifier.routes import router as image_classifier_router +from features.image_edit_detector.routes import router as image_edit_detector_router +from fastapi.staticfiles import StaticFiles + from config import ACCESS_RATE + import requests + limiter = Limiter(key_func=get_remote_address, default_limits=[ACCESS_RATE]) app = FastAPI() - +# added the robots.txt # Set up SlowAPI app.state.limiter = limiter -app.add_exception_handler(RateLimitExceeded, lambda request, exc: JSONResponse( - status_code=429, - content={ - "status_code": 429, - "error": "Rate limit exceeded", - "message": "Too many requests. Chill for a bit and try again" - } -)) +app.add_exception_handler( + RateLimitExceeded, + lambda request, exc: JSONResponse( + status_code=429, + content={ + "status_code": 429, + "error": "Rate limit exceeded", + "message": "Too many requests. Chill for a bit and try again", + }, + ), +) app.add_middleware(SlowAPIMiddleware) # Include your routes app.include_router(text_classifier_router, prefix="/text") -app.include_router(nepali_text_classifier_router,prefix="/NP") +app.include_router(nepali_text_classifier_router, prefix="/NP") +app.include_router(image_classifier_router, prefix="/AI-image") +app.include_router(image_edit_detector_router, prefix="/detect") + + @app.get("/") @limiter.limit(ACCESS_RATE) async def root(request: Request): return { "message": "API is working", - "endpoints": ["/text/analyse", "/text/upload", "/text/analyse-sentences", "/text/analyse-sentance-file"] + "endpoints": [ + "/text/analyse", + "/text/upload", + "/text/analyse-sentences", + "/text/analyse-sentance-file", + "/NP/analyse", + "/NP/upload", + "/NP/analyse-sentences", + "/NP/file-sentences-analyse", + "/AI-image/analyse", + ], } - diff --git a/config.py b/config.py old mode 100644 new mode 100755 diff --git a/docs/api_endpoints.md b/docs/api_endpoints.md old mode 100644 new mode 100755 index 944863a792054aef38b563c1c0d3d59145f06302..82190d6be1266ad707b474ae6a8bbf6440ff0246 --- a/docs/api_endpoints.md +++ b/docs/api_endpoints.md @@ -2,13 +2,13 @@ ### English (GPT-2) - `/text/` -| Endpoint | Method | Description | -| --------------------------------- | ------ | ----------------------------------------- | -| `/text/analyse` | POST | Classify raw English text | -| `/text/analyse-sentences` | POST | Sentence-by-sentence breakdown | -| `/text/analyse-sentance-file` | POST | Upload file, per-sentence breakdown | -| `/text/upload` | POST | Upload file for overall classification | -| `/text/health` | GET | Health check | +| Endpoint | Method | Description | +| ----------------------------- | ------ | -------------------------------------- | +| `/text/analyse` | POST | Classify raw English text | +| `/text/analyse-sentences` | POST | Sentence-by-sentence breakdown | +| `/text/analyse-sentance-file` | POST | Upload file, per-sentence breakdown | +| `/text/upload` | POST | Upload file for overall classification | +| `/text/health` | GET | Health check | #### Example: Classify English text @@ -20,6 +20,7 @@ curl -X POST http://localhost:8000/text/analyse \ ``` **Response:** + ```json { "result": "AI-generated", @@ -40,13 +41,13 @@ curl -X POST http://localhost:8000/text/upload \ ### Nepali (SentencePiece) - `/NP/` -| Endpoint | Method | Description | -| --------------------------------- | ------ | ----------------------------------------- | -| `/NP/analyse` | POST | Classify Nepali text | -| `/NP/analyse-sentences` | POST | Sentence-by-sentence breakdown | -| `/NP/upload` | POST | Upload Nepali PDF for classification | -| `/NP/file-sentences-analyse` | POST | PDF upload, per-sentence breakdown | -| `/NP/health` | GET | Health check | +| Endpoint | Method | Description | +| ---------------------------- | ------ | ------------------------------------ | +| `/NP/analyse` | POST | Classify Nepali text | +| `/NP/analyse-sentences` | POST | Sentence-by-sentence breakdown | +| `/NP/upload` | POST | Upload Nepali PDF for classification | +| `/NP/file-sentences-analyse` | POST | PDF upload, per-sentence breakdown | +| `/NP/health` | GET | Health check | #### Example: Nepali text classification @@ -58,6 +59,7 @@ curl -X POST http://localhost:8000/NP/analyse \ ``` **Response:** + ```json { "label": "Human", @@ -73,3 +75,18 @@ curl -X POST http://localhost:8000/NP/upload \ -F 'file=@NepaliText.pdf;type=application/pdf' ``` +### Image-Classification -`/verify-image/` + +| Endpoint | Method | Description | +| ----------------------- | ------ | ----------------------- | +| `/verify-image/analyse` | POST | Classify Image using ML | + +#### Example: Image-Classification + +```bash +curl -X POST http://localhost:8000/verify-image/analyse \ + -H "Authorization: Bearer " \ + -F 'file=@test1.png' +``` + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/deployment.md b/docs/deployment.md old mode 100644 new mode 100755 index 88945291e2919d491df884f754b729e819dfd13e..1dce95d2afedbfad11db29cbf231fd9692e02835 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -103,3 +103,6 @@ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] Happy deploying! **P.S.** Try not to break stuff. πŸ˜… + + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/detector/ELA.md b/docs/detector/ELA.md new file mode 100644 index 0000000000000000000000000000000000000000..0ff39750147cedd592afe51175af3ec2fa3686b0 --- /dev/null +++ b/docs/detector/ELA.md @@ -0,0 +1,65 @@ +# Error Level Analysis (ELA) Detector + +This module provides a function to perform Error Level Analysis (ELA) on images to detect potential manipulations or edits. + +## Function: `run_ela` + +```python +def run_ela(image: Image.Image, quality: int = 90, threshold: int = 15) -> bool: +``` + +### Description + +Error Level Analysis (ELA) works by recompressing an image at a specified JPEG quality level and comparing it to the original image. Differences between the two images reveal areas with inconsistent compression artifacts β€” often indicating image manipulation. + +The function computes the maximum pixel difference across all color channels and uses a threshold to determine if the image is likely edited. + +### Parameters + +| Parameter | Type | Default | Description | +| ----------- | ----------- | ------- | ------------------------------------------------------------------------------------------- | +| `image` | `PIL.Image` | N/A | Input image in RGB mode to analyze. | +| `quality` | `int` | 90 | JPEG compression quality used for recompression during analysis (lower = more compression). | +| `threshold` | `int` | 15 | Pixel difference threshold to flag the image as edited. | + +### Returns + +`bool` + +- `True` if the image is likely edited (max pixel difference > threshold). +- `False` if the image appears unedited. + +### Usage Example + +```python +from PIL import Image +from detectors.ela import run_ela + +# Open and convert image to RGB +img = Image.open("example.jpg").convert("RGB") + +# Run ELA detection +is_edited = run_ela(img, quality=90, threshold=15) + +print("Image edited:", is_edited) +``` + +### Notes + +- The input image **must** be in RGB mode for accurate analysis. +- ELA is a heuristic technique; combining it with other detection methods increases reliability. +- Visualizing the enhanced difference image can help identify edited regions (not returned by this function but possible to add). + +### Installation + +Make sure you have Pillow installed: + +```bash +pip install pillow +``` + +### Running Locally + +Just put the function in a notebook or script file and run it with your image. It works well for basic images. + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/detector/fft.md b/docs/detector/fft.md new file mode 100644 index 0000000000000000000000000000000000000000..e2184551d9d2e2bddc972e787ff4291ec3fa7c57 --- /dev/null +++ b/docs/detector/fft.md @@ -0,0 +1,136 @@ + +# Fast Fourier Transform (FFT) Detector + +```python +def run_fft(image: Image.Image, threshold: float = 0.92) -> bool: +``` + +## **Overview** + +The `run_fft` function performs a frequency domain analysis on an image using the **Fast Fourier Transform (FFT)** to detect possible **AI generation or digital manipulation**. It leverages the fact that artificially generated or heavily edited images often exhibit a distinct high-frequency pattern. + +--- + +## **Parameters** + +| Parameter | Type | Description | +| ----------- | ----------------- | --------------------------------------------------------------------------------------- | +| `image` | `PIL.Image.Image` | Input image to analyze. It will be converted to grayscale and resized. | +| `threshold` | `float` | Proportion threshold of high-frequency components to flag the image. Default is `0.92`. | + +--- + +## **Returns** + +| Type | Description | +| ------ | ---------------------------------------------------------------------- | +| `bool` | `True` if image is likely AI-generated/manipulated; otherwise `False`. | + +--- + +## **Step-by-Step Explanation** + +### 1. **Grayscale Conversion** + +All images are converted to grayscale: + +```python +gray_image = image.convert("L") +``` + +### 2. **Resize** + +The image is resized to a fixed $512 \times 512$ for uniformity: + +```python +resized_image = gray_image.resize((512, 512)) +``` + +### 3. **FFT Calculation** + +Compute the 2D Discrete Fourier Transform: + +$$ +F(u, v) = \sum_{x=0}^{M-1} \sum_{y=0}^{N-1} f(x, y) \cdot e^{-2\pi i \left( \frac{ux}{M} + \frac{vy}{N} \right)} +$$ + +```python +fft_result = fft2(image_array) +``` + +### 4. **Shift Zero Frequency to Center** + +Use `fftshift` to center the zero-frequency component: + +```python +fft_shifted = fftshift(fft_result) +``` + +### 5. **Magnitude Spectrum** + +$$ +|F(u, v)| = \sqrt{\Re^2 + \Im^2} +$$ + +```python +magnitude_spectrum = np.abs(fft_shifted) +``` + +### 6. **Normalization** + +Normalize the spectrum to avoid scale issues: + +$$ +\text{Normalized}(u,v) = \frac{|F(u,v)|}{\max(|F(u,v)|)} +$$ + +```python +normalized_spectrum = magnitude_spectrum / max_magnitude +``` + +### 7. **High-Frequency Detection** + +High-frequency components are defined as: + +$$ +\text{Mask}(u,v) = +\begin{cases} +1 & \text{if } \text{Normalized}(u,v) > 0.5 \\ +0 & \text{otherwise} +\end{cases} +$$ + +```python +high_freq_mask = normalized_spectrum > 0.5 +``` + +### 8. **Proportion Calculation** + +$$ +\text{Ratio} = \frac{\sum \text{Mask}}{\text{Total pixels}} +$$ + +```python +high_freq_ratio = np.sum(high_freq_mask) / normalized_spectrum.size +``` + +### 9. **Threshold Decision** + +If the ratio exceeds the threshold: + +$$ +\text{is\_fake} = (\text{Ratio} > \text{Threshold}) +$$ + +```python +is_fake = high_freq_ratio > threshold +``` + +it is implemented in the api + +### Running Locally + +Just put the function in a notebook or script file and run it with your image. It works well for basic images. + + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/detector/meta.md b/docs/detector/meta.md new file mode 100644 index 0000000000000000000000000000000000000000..89f72d65c2722e6f2c6c52a8c83ad4d822dbfdcf --- /dev/null +++ b/docs/detector/meta.md @@ -0,0 +1,20 @@ +# Metadata Analysis for Image Edit Detection + +This module inspects image metadata to detect possible signs of AI-generation or post-processing edits. + +## Overview + +- Many AI-generated images and edited images leave identifiable traces in their metadata. +- This detector scans image EXIF metadata and raw bytes for known AI generation indicators and common photo editing software signatures. +- It classifies images as `"ai_generated"`, `"edited"`, or `"undetermined"` based on detected markers. +- Handles invalid image formats gracefully by reporting errors. + +## How It Works + +- Opens the image from raw bytes using the Python Pillow library (`PIL`). +- Reads EXIF metadata and specifically looks for the "Software" tag that often contains the editing app name. +- Checks for common image editors such as Photoshop, GIMP, Snapseed, etc. +- Scans the entire raw byte content of the image for embedded AI generation identifiers like "midjourney", "stable-diffusion", "openai", etc. +- Returns a status string indicating the metadata classification. + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/detector/note-for-backend.md b/docs/detector/note-for-backend.md new file mode 100644 index 0000000000000000000000000000000000000000..460f2150b39893903dba44139e97704cd46ac307 --- /dev/null +++ b/docs/detector/note-for-backend.md @@ -0,0 +1,94 @@ + +# πŸ“¦API integration note + +## Overview + +This system integrates **three image forensics methods**β€”**ELA**, **FFT**, and **Metadata analysis**β€”into a single detection pipeline to determine whether an image is AI-generated, manipulated, or authentic. + +--- + +## πŸ” Detection Modules + +### 1. **ELA (Error Level Analysis)** + +* **Purpose:** Detects tampering or editing by analyzing compression error levels. +* **Accuracy:** βœ… *Most accurate method* +* **Performance:** ❗ *Slowest method* +* **Output:** `True` (edited) or `False` (authentic) + +### 2. **FFT (Fast Fourier Transform)** + +* **Purpose:** Identifies high-frequency patterns typical of AI-generated images. +* **Accuracy:** ⚠️ *Moderately accurate* +* **Performance:** ❗ *Moderate to slow* +* **Output:** `True` (likely AI-generated) or `False` (authentic) + +### 3. **Metadata Analysis** + +* **Purpose:** Detects traces of AI tools or editors in image metadata or binary content. +* **Accuracy:** ⚠️ *Fast but weaker signal* +* **Performance:** πŸš€ *Fastest method* +* **Output:** One of: + + * `"ai_generated"` – AI tool or generator identified + * `"edited"` – Edited using known software + * `"undetermined"` – No signature found + +--- + +## 🧩 Integration Plan + +### βž• Combine all three APIs into one unified endpoint: + +```bash +POST /api/detect-image +``` + +### Input: + +* `image`: Image file (binary, any format supported by Pillow) + +### Output: + +```json +{ + "ela_result": true, + "fft_result": false, + "metadata_result": "ai_generated", + "final_decision": "ai_generated" +} +``` +> NOTE:Optionally recommending a default logic (e.g., trust ELA > FFT > Metadata). + +## Result implementation +| `ela_result` | `fft_result` | `metadata_result` | Suggested Final Decision | Notes | +| ------------ | ------------ | ----------------- | ------------------------ | ----------------------------------------------------------------------- | +| `true` | `true` | `"ai_generated"` | `ai_generated` | Strong evidence from all three modules | +| `true` | `false` | `"edited"` | `edited` | ELA confirms editing, no AI signals | +| `true` | `false` | `"undetermined"` | `edited` | ELA indicates manipulation | +| `false` | `true` | `"ai_generated"` | `ai_generated` | No edits, but strong AI frequency & metadata signature | +| `false` | `true` | `"undetermined"` | `possibly_ai_generated` | Weak metadata, but FFT indicates possible AI generation | +| `false` | `false` | `"ai_generated"` | `ai_generated` | Metadata alone shows AI use | +| `false` | `false` | `"edited"` | `possibly_edited` | Weak signalβ€”metadata shows editing but no structural or frequency signs | +| `false` | `false` | `"undetermined"` | `authentic` | No detectable manipulation or AI indicators | + + +### Decision Logic: + +* Use **ELA** as the **primary indicator** for manipulation. +* Supplement with **FFT** and **Metadata** to improve reliability. +* Combine using a simple rule-based or voting system. + +--- + +## βš™οΈ Performance Consideration + +| Method | Speed | Strength | +| -------- | ----------- | -------------------- | +| ELA | ❗ Slow | βœ… Highly accurate | +| FFT | ⚠️ Moderate | ⚠️ Somewhat reliable | +| Metadata | πŸš€ Fast | ⚠️ Low confidence | + +> For high-throughput systems, consider running Metadata first and conditionally applying ELA/FFT if suspicious. + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/features/image_classifier.md b/docs/features/image_classifier.md new file mode 100644 index 0000000000000000000000000000000000000000..0de01a9be15416977748910a151bb0dccdbab3ff --- /dev/null +++ b/docs/features/image_classifier.md @@ -0,0 +1,31 @@ +# Image Classifier + +## Overview + +This module classifies whether an input image is AI-generated or a real-life photograph. + +## Model + +- Architecture: InceptionV3 +- Type: Binary Classifier (AI vs Real) +- Format: H5 model (`latest-my_cnn_model.h5`) + +## Dataset + +- Total images: ~79,950 +- Balanced between real and generated images +- Preprocessing: Resizing, normalization + +## Code Location + +- Controller: `features/image_classifier/controller.py` +- Model Loader: `features/image_classifier/model_loader.py` +- Preprocessor: `features/image_classifier/preprocess.py` + +## API + +- Endpoint: [ENDPOINTS](../api_endpoints.md) +- Input: Image file (PNG/JPG) +- Output: JSON response with classification result and confidence + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/features/nepali_text_classifier.md b/docs/features/nepali_text_classifier.md new file mode 100644 index 0000000000000000000000000000000000000000..0940b2291be3633bcefc85001f668e3d77707c4c --- /dev/null +++ b/docs/features/nepali_text_classifier.md @@ -0,0 +1,30 @@ +# Nepali Text Classifier + +## Overview + +This classifier identifies whether Nepali-language text content is written by a human or AI. + +## Model + +- Base Model: XLM-Roberta (XLMRClassifier) +- Language: Nepali (Multilingual model) +- Fine-tuned with scraped web content (~18,000 samples) + +## Dataset + +- Custom scraped dataset with manual labeling +- Includes news, blogs, and synthetic content from various LLMs + +## Code Location + +- Controller: `features/nepali_text_classifier/controller.py` +- Inference: `features/nepali_text_classifier/inferencer.py` +- Model Loader: `features/nepali_text_classifier/model_loader.py` + +## API + +- Endpoint: [ENDPOINTS](../api_endpoints.md) +- Input: Raw text +- Output: JSON classification with label and confidence score + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/features/text_classifier.md b/docs/features/text_classifier.md new file mode 100644 index 0000000000000000000000000000000000000000..a678d5fb7010c4408049859491cc5346515adbda --- /dev/null +++ b/docs/features/text_classifier.md @@ -0,0 +1,30 @@ +# English Text Classifier + +## Overview + +Detects whether English-language text is AI-generated or human-written. + +## Model Pipeline + +- Tokenizer: GPT-2 Tokenizer +- Model: Custom trained binary classifier + +## Dataset + +- Balanced dataset: Human vs AI-generated (ChatGPT, Claude, etc.) +- Tokenized and fed into the model using PyTorch/TensorFlow + +## Code Location + +- Controller: `features/text_classifier/controller.py` +- Inference: `features/text_classifier/inferencer.py` +- Model Loader: `features/text_classifier/model_loader.py` +- Preprocessor: `features/text_classifier/preprocess.py` + +## API + +- Endpoint: [ENDPOINTS](../api_endpoints.md) +- Input: Raw English text +- Output: Prediction result with probability/confidence + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/functions.md b/docs/functions.md old mode 100644 new mode 100755 index 4a31d623ba09356e92a94cc57c72c0c879f76500..43934658fcd9135fb3788dff4cc346d17706ec07 --- a/docs/functions.md +++ b/docs/functions.md @@ -49,5 +49,14 @@ - **`analyze_sentence_file()`** Like `handle_file_sentence()`β€”analyzes sentences in uploaded files. - +--- ## for image_classifier + +- **`Classify_Image_router()`** – Handles image classification requests by routing and coordinating preprocessing and inference. +- **`classify_image()`** – Performs AI vs human image classification using the loaded model. +- **`load_model()`** – Loads the pretrained model from Hugging Face at server startup. +- **`preprocess_image()`** – Applies all required preprocessing steps to the input image. + +> Note: While many functions mirror those in the text classifier, the image classifier primarily uses TensorFlow rather than PyTorch. + +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/nestjs_integration.md b/docs/nestjs_integration.md old mode 100644 new mode 100755 index a11e374a9d841ea4fc1967daa320f2ba5712e14a..36337367c685703dc0af57263b1dd79cb6052a17 --- a/docs/nestjs_integration.md +++ b/docs/nestjs_integration.md @@ -80,3 +80,4 @@ export class AppController { } } ``` +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/security.md b/docs/security.md old mode 100644 new mode 100755 index 125082c56f29cb336f35eecf08b04c4eae8ba387..2310fe2998bf2837264d8737a24178c59f71b2e1 --- a/docs/security.md +++ b/docs/security.md @@ -7,3 +7,4 @@ All endpoints require authentication via Bearer token: Unauthorized requests receive `403 Forbidden`. +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/setup.md b/docs/setup.md old mode 100644 new mode 100755 index 8e0596e6d606907480654b39e935c67895d1729b..13468666a89b93c246db89a09943ff2b379cd99c --- a/docs/setup.md +++ b/docs/setup.md @@ -21,3 +21,4 @@ SECRET_TOKEN=your_secret_token_here ```bash uvicorn app:app --host 0.0.0.0 --port 8000 ``` +[πŸ”™ Back to Main README](../README.md) diff --git a/docs/status_code.md b/docs/status_code.md new file mode 100644 index 0000000000000000000000000000000000000000..d9a5b0ba05048aeb6932087a8fa85731a59c1582 --- /dev/null +++ b/docs/status_code.md @@ -0,0 +1,68 @@ +# Error Codes Reference + +## πŸ”Ή Summary Table + +| Code | Message | Description | +| ---- | ----------------------------------------------------- | ------------------------------------------ | +| 400 | Text must contain at least two words | Input text too short | +| 400 | Text should be less than 10,000 characters | Input text too long | +| 404 | The file is empty or only contains whitespace | File has no usable content | +| 404 | Invalid file type. Only .docx, .pdf, and .txt allowed | Unsupported file format | +| 403 | Invalid or expired token | Authentication token is invalid or expired | +| 413 | Text must contain at least two words | Text too short (alternative condition) | +| 413 | Text must be less than 10,000 characters | Text too long (alternative condition) | +| 413 | The image error (preprocessing) | Image size/content issue | +| 500 | Error processing the file | Internal server error while processing | + +--- + +## πŸ” Error Details + +### `400` - Bad Request + +- **Text must contain at least two words** + The input text field is too short. Submit at least two words to proceed. + +- **Text should be less than 10,000 characters** + Input text exceeds the maximum allowed character limit. Consider truncating or summarizing the content. + +--- + +### `404` - Not Found + +- **The file is empty or only contains whitespace** + The uploaded file is invalid due to lack of meaningful content. Ensure the file has readable, non-empty text. + +- **Invalid file type. Only .docx, .pdf, and .txt are allowed** + The file format is not supported. Convert the file to one of the allowed formats before uploading. + +--- + +### `403` - Forbidden + +- **Invalid or expired token** + Your access token is either expired or incorrect. Try logging in again or refreshing the token. + +--- + +### `413` - Payload Too Large + +- **Text must contain at least two words** + The text payload is too small or malformed under a large upload context. Add more content. + +- **Text must be less than 10,000 characters** + The payload exceeds the allowed character limit for a single request. Break it into smaller chunks if needed. + +- **The image error** + The uploaded image is too large or corrupted. Try resizing or compressing it before retrying. + +--- + +### `500` - Internal Server Error + +- **Error processing the file** + An unexpected server-side failure occurred during file analysis. Retry later or contact support if persistent. + +--- + +> πŸ“Œ **Note:** Always validate inputs, check token status, and follow file guidelines before making requests. diff --git a/docs/structure.md b/docs/structure.md old mode 100644 new mode 100755 index d6bdff464403373701d1331c175c3718d1ff88f3..2e3f59b5ea9965ee307cc32b604eeb99a722212e --- a/docs/structure.md +++ b/docs/structure.md @@ -1,36 +1,58 @@ ## πŸ—οΈ Project Structure -``` -β”œβ”€β”€ app.py # Main FastAPI app entrypoint -β”œβ”€β”€ config.py # Configuration loader (.env, settings) -β”œβ”€β”€ features/ -β”‚ β”œβ”€β”€ text_classifier/ # English (GPT-2) classifier +```bash +AI-Checker/ +β”‚ +β”œβ”€β”€ app.py # Main FastAPI entry point +β”œβ”€β”€ config.py # Configuration settings +β”œβ”€β”€ Dockerfile # Docker build script +β”œβ”€β”€ Procfile # Deployment entry for platforms like Heroku/Railway +β”œβ”€β”€ requirements.txt # Python dependency list +β”œβ”€β”€ README.md # Main project overview πŸ“˜ +β”‚ +β”œβ”€β”€ features/ # Core AI content detection modules +β”‚ β”œβ”€β”€ image_classifier/ # Classifies AI vs Real images +β”‚ β”‚ β”œβ”€β”€ controller.py +β”‚ β”‚ β”œβ”€β”€ model_loader.py +β”‚ β”‚ └── preprocess.py +β”‚ β”œβ”€β”€ image_edit_detector/ # Detects tampered or edited images +β”‚ β”œβ”€β”€ nepali_text_classifier/ # Classifies Nepali text as AI or Human β”‚ β”‚ β”œβ”€β”€ controller.py β”‚ β”‚ β”œβ”€β”€ inferencer.py β”‚ β”‚ β”œβ”€β”€ model_loader.py -β”‚ β”‚ β”œβ”€β”€ preprocess.py -β”‚ β”‚ └── routes.py -β”‚ └── nepali_text_classifier/ # Nepali (sentencepiece) classifier +β”‚ β”‚ └── preprocess.py +β”‚ └── text_classifier/ # Classifies English text as AI or Human β”‚ β”œβ”€β”€ controller.py β”‚ β”œβ”€β”€ inferencer.py β”‚ β”œβ”€β”€ model_loader.py -β”‚ β”œβ”€β”€ preprocess.py -β”‚ └── routes.py -β”œβ”€β”€ np_text_model/ # Nepali model artifacts (auto-downloaded) -β”‚ β”œβ”€β”€ classifier/ -β”‚ β”‚ └── sentencepiece.bpe.model -β”‚ └── model_95_acc.pth -β”œβ”€β”€ models/ # English GPT-2 model/tokenizer (auto-downloaded) -β”‚ β”œβ”€β”€ merges.txt -β”‚ β”œβ”€β”€ tokenizer.json -β”‚ └── model_weights.pth -β”œβ”€β”€ Dockerfile # Container build config -β”œβ”€β”€ Procfile # Deployment entrypoint (for PaaS) -β”œβ”€β”€ requirements.txt # Python dependencies -β”œβ”€β”€ README.md -β”œβ”€β”€ Docs # documents -└── .env # Secret token(s), environment config +β”‚ └── preprocess.py +β”‚ +β”œβ”€β”€ docs/ # Internal documentation and API references +β”‚ β”œβ”€β”€ api_endpoints.md +β”‚ β”œβ”€β”€ deployment.md +β”‚ β”œβ”€β”€ detector/ +β”‚ β”‚ β”œβ”€β”€ ELA.md +β”‚ β”‚ β”œβ”€β”€ fft.md +β”‚ β”‚ β”œβ”€β”€ meta.md +β”‚ β”‚ └── note-for-backend.md +β”‚ β”œβ”€β”€ features/ +β”‚ β”‚ β”œβ”€β”€ image_classifier.md +β”‚ β”‚ β”œβ”€β”€ nepali_text_classifier.md +β”‚ β”‚ └── text_classifier.md +β”‚ β”œβ”€β”€ functions.md +β”‚ β”œβ”€β”€ nestjs_integration.md +β”‚ β”œβ”€β”€ security.md +β”‚ β”œβ”€β”€ setup.md +β”‚ └── structure.md +β”‚ +β”œβ”€β”€ IMG_Models/ # Stored model weights +β”‚ └── latest-my_cnn_model.h5 +β”‚ +β”œβ”€β”€ notebooks/ # Experimental/debug Jupyter notebooks +β”œβ”€β”€ static/ # Static files (e.g., UI assets, test inputs) +└── test.md # Test usage notes ``` + ### 🌟 Key Files and Their Roles - **`app.py`**: Entry point initializing FastAPI app and routes. @@ -39,16 +61,14 @@ - **`__init__.py`**: Package initializer for the root module and submodules. - **`features/text_classifier/`** - **`controller.py`**: Handles logic between routes and the model. - - **`inferencer.py`**: Runs inference and returns predictions as well as file system - utilities. + - **`inferencer.py`**: Runs inference and returns predictions as well as file system + utilities. - **`features/NP/`** - **`controller.py`**: Handles logic between routes and the model. - - **`inferencer.py`**: Runs inference and returns predictions as well as file system - utilities. + - **`inferencer.py`**: Runs inference and returns predictions as well as file system + utilities. - **`model_loader.py`**: Loads the ML model and tokenizer. - **`preprocess.py`**: Prepares input text for the model. - **`routes.py`**: Defines API routes for text classification. - - --[Main](../README.md) \ No newline at end of file +[πŸ”™ Back to Main README](../README.md) diff --git a/features/image_classifier/__init__.py b/features/image_classifier/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/features/image_classifier/controller.py b/features/image_classifier/controller.py new file mode 100755 index 0000000000000000000000000000000000000000..3f59e7458adae343be1ca26e8970b47dadbd5950 --- /dev/null +++ b/features/image_classifier/controller.py @@ -0,0 +1,16 @@ +from fastapi import HTTPException, File, UploadFile +from .preprocess import preprocess_image +from .inferencer import classify_image + + +async def Classify_Image_router(file: UploadFile = File(...)): + try: + image_array = preprocess_image(file) + try: + result = classify_image(image_array) + return result + except: + raise HTTPException(status_code=423, detail="something went wrong") + + except Exception as e: + raise HTTPException(status_code=413, detail=str(e)) diff --git a/features/image_classifier/inferencer.py b/features/image_classifier/inferencer.py new file mode 100755 index 0000000000000000000000000000000000000000..844a62e6700822a11fb477237cafcbb7cc38022d --- /dev/null +++ b/features/image_classifier/inferencer.py @@ -0,0 +1,42 @@ +import numpy as np +from .model_loader import get_model + +# Thresholds +AI_THRESHOLD = 0.55 +HUMAN_THRESHOLD = 0.45 + + +def classify_image(image_array: np.ndarray) -> dict: + try: + model = get_model() + predictions = model.predict(image_array) + + if predictions.ndim != 2 or predictions.shape[1] != 1: + raise ValueError( + "Model output shape is invalid. Expected shape: (batch, 1)" + ) + + ai_conf = float(np.clip(predictions[0][0], 0.0, 1.0)) + human_conf = 1.0 - ai_conf + + # Classification logic + if ai_conf > AI_THRESHOLD: + label = "AI Generated" + elif ai_conf < HUMAN_THRESHOLD: + label = "Human Generated" + else: + label = "Uncertain (Maybe AI)" + + return { + "label": label, + "ai_confidence": round(ai_conf * 100, 2), + "human_confidence": round(human_conf * 100, 2), + } + + except Exception as e: + return { + "error": str(e), + "label": "Classification Failed", + "ai_confidence": None, + "human_confidence": None, + } diff --git a/features/image_classifier/model_loader.py b/features/image_classifier/model_loader.py new file mode 100755 index 0000000000000000000000000000000000000000..e419e7dc32eb0bb75b7597b5421a25b186cfa2a4 --- /dev/null +++ b/features/image_classifier/model_loader.py @@ -0,0 +1,58 @@ +import os +import shutil +import logging +import tensorflow as tf +from tensorflow.keras.layers import Layer +from huggingface_hub import snapshot_download + +# Model config +REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier" +MODEL_DIR = "./IMG_Models" +WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5") + +# Device info (for logging) +gpus = tf.config.list_physical_devices("GPU") +device = "cuda" if gpus else "cpu" + +# Global model reference +_model_img = None + +# Custom layer used in the model +class Cast(Layer): + def call(self, inputs): + return tf.cast(inputs, tf.float32) + +def warmup(): + global _model_img + download_model_repo() + _model_img = load_model() + logging.info("Image model is ready.") + +def download_model_repo(): + if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR): + logging.info("Image model already exists, skipping download.") + return + snapshot_path = snapshot_download(repo_id=REPO_ID) + os.makedirs(MODEL_DIR, exist_ok=True) + shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True) + +def load_model(): + global _model_img + if _model_img is not None: + return _model_img + + print(f"{'GPU detected' if device == 'cuda' else 'No GPU detected'}, loading model on {device.upper()}.") + + _model_img = tf.keras.models.load_model( + WEIGHTS_PATH, custom_objects={"Cast": Cast} + ) + print("Model input shape:", _model_img.input_shape) + return _model_img + +def get_model(): + global _model_img + if _model_img is None: + download_model_repo() + _model_img = load_model() + return _model_img + diff --git a/features/image_classifier/preprocess.py b/features/image_classifier/preprocess.py new file mode 100755 index 0000000000000000000000000000000000000000..9ecf3f8ada44910df1cf4d0ce064b4f78df0314c --- /dev/null +++ b/features/image_classifier/preprocess.py @@ -0,0 +1,26 @@ +import numpy as np +import cv2 +from fastapi import HTTPException + + +def preprocess_image(file): + try: + file.file.seek(0) + image_bytes = file.file.read() + nparr = np.frombuffer(image_bytes, np.uint8) + img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + if img is None: + raise HTTPException(status_code=500, detail="Could not decode image.") + + img = cv2.resize(img, (299, 299)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img / 255.0 + img = np.expand_dims(img, axis=0).astype(np.float32) + return img + + except HTTPException: + raise # Re-raise already defined HTTP errors + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Image preprocessing failed: {str(e)}" + ) diff --git a/features/image_classifier/routes.py b/features/image_classifier/routes.py new file mode 100755 index 0000000000000000000000000000000000000000..e64983c8a4298bc9abf8ceedf7ca3b517d3595de --- /dev/null +++ b/features/image_classifier/routes.py @@ -0,0 +1,26 @@ +from slowapi import Limiter +from config import ACCESS_RATE +from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile +from fastapi.security import HTTPBearer +from slowapi import Limiter +from slowapi.util import get_remote_address +from .controller import Classify_Image_router +router = APIRouter() +limiter = Limiter(key_func=get_remote_address) +security = HTTPBearer() + +@router.post("/analyse") +@limiter.limit(ACCESS_RATE) +async def analyse( + request: Request, + file: UploadFile = File(...), + token: str = Depends(security) +): + result = await Classify_Image_router(file) # await the async function + return result + +@router.get("/health") +@limiter.limit(ACCESS_RATE) +def health(request: Request): + return {"status": "ok"} + diff --git a/features/image_edit_detector/controller.py b/features/image_edit_detector/controller.py new file mode 100755 index 0000000000000000000000000000000000000000..cd65df1b01288a251304eadd83c7db255f8e803d --- /dev/null +++ b/features/image_edit_detector/controller.py @@ -0,0 +1,49 @@ +from PIL import Image +import io +from io import BytesIO +from .detectors.fft import run_fft +from .detectors.metadata import run_metadata +from .detectors.ela import run_ela +from .preprocess import preprocess_image +from fastapi import HTTPException,status,Depends +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +security=HTTPBearer() +import os +async def process_image_ela(image_bytes: bytes, quality: int=90): + image = Image.open(io.BytesIO(image_bytes)) + + if image.mode != "RGB": + image = image.convert("RGB") + + compressed_image = preprocess_image(image, quality) + ela_result = run_ela(compressed_image, quality) + + return { + "is_edited": ela_result, + "ela_score": ela_result + } + +async def process_fft_image(image_bytes: bytes,threshold:float=0.95) -> dict: + image = Image.open(BytesIO(image_bytes)).convert("RGB") + result = run_fft(image,threshold) + return {"edited": bool(result)} + + +async def process_meta_image(image_bytes: bytes) -> dict: + try: + result = run_metadata(image_bytes) + return {"source": result} # e.g. "edited", "phone_capture", "unknown" + except Exception as e: + # Handle errors gracefully, return useful message or raise HTTPException if preferred + return {"error": str(e)} + + +async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): + token = credentials.credentials + expected_token = os.getenv("MY_SECRET_TOKEN") + if token != expected_token: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Invalid or expired token" + ) + return token diff --git a/features/image_edit_detector/detectors/ela.py b/features/image_edit_detector/detectors/ela.py new file mode 100644 index 0000000000000000000000000000000000000000..3f9f1697037e1d70d30c48f2ed91234b9793e6ef --- /dev/null +++ b/features/image_edit_detector/detectors/ela.py @@ -0,0 +1,32 @@ +from PIL import Image, ImageChops, ImageEnhance +import io + + +def run_ela(image: Image.Image, quality: int = 90, threshold: int = 15) -> bool: + """ + Perform Error Level Analysis to detect image manipulation. + + Parameters: + image (PIL.Image): Input image (should be RGB). + quality (int): JPEG compression quality for ELA. + threshold (int): Maximum pixel difference threshold to classify as edited. + + Returns: + bool: True if image appears edited, False otherwise. + """ + + # Recompress the image into JPEG format in memory + buffer = io.BytesIO() + image.save(buffer, format="JPEG", quality=quality) + buffer.seek(0) + recompressed = Image.open(buffer) + + # Compute the pixel-wise difference + diff = ImageChops.difference(image, recompressed) + extrema = diff.getextrema() + max_diff = max([ex[1] for ex in extrema]) + + # Enhance difference image for debug (not returned) + _ = ImageEnhance.Brightness(diff).enhance(10) + + return max_diff > threshold diff --git a/features/image_edit_detector/detectors/fft.py b/features/image_edit_detector/detectors/fft.py new file mode 100644 index 0000000000000000000000000000000000000000..7cee524053b83154b9de8ae554584694cd7c8b7a --- /dev/null +++ b/features/image_edit_detector/detectors/fft.py @@ -0,0 +1,40 @@ +import numpy as np +from PIL import Image +from scipy.fft import fft2, fftshift + + +def run_fft(image: Image.Image, threshold: float = 0.92) -> bool: + """ + Detects potential image manipulation or generation using FFT-based high-frequency analysis. + + Parameters: + image (PIL.Image.Image): The input image. + threshold (float): Proportion of high-frequency components above which the image is flagged. + + Returns: + bool: True if the image is likely AI-generated or manipulated, False otherwise. + """ + gray_image = image.convert("L") + + resized_image = gray_image.resize((512, 512)) + + + image_array = np.array(resized_image) + + fft_result = fft2(image_array) + + fft_shifted = fftshift(fft_result) + + magnitude_spectrum = np.abs(fft_shifted) + max_magnitude = np.max(magnitude_spectrum) + if max_magnitude == 0: + return False # Avoid division by zero if image is blank + normalized_spectrum = magnitude_spectrum / max_magnitude + + high_freq_mask = normalized_spectrum > 0.5 + + high_freq_ratio = np.sum(high_freq_mask) / normalized_spectrum.size + + is_fake = high_freq_ratio > threshold + return is_fake + diff --git a/features/image_edit_detector/detectors/metadata.py b/features/image_edit_detector/detectors/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..aa0ac19cd2e4025a0b14c2d8b6784060af2a40e9 --- /dev/null +++ b/features/image_edit_detector/detectors/metadata.py @@ -0,0 +1,82 @@ +from PIL import Image, UnidentifiedImageError +import io + +# Common AI metadata identifiers in image files. +AI_INDICATORS = [ + b'c2pa', b'claim_generator', b'claim_generator_info', + b'created_software_agent', b'actions.v2', b'assertions', + b'urn:c2pa', b'jumd', b'jumb', b'jumdcbor', b'jumdc2ma', + b'jumdc2as', b'jumdc2cl', b'cbor', b'convertedsfwareagent',b'c2pa.version', + b'c2pa.assertions', b'c2pa.actions', + b'c2pa.thumbnail', b'c2pa.signature', b'c2pa.manifest', + b'c2pa.manifest_store', b'c2pa.ingredient', b'c2pa.parent', + b'c2pa.provenance', b'c2pa.claim', b'c2pa.hash', b'c2pa.authority', + b'jumdc2pn', b'jumdrefs', b'jumdver', b'jumdmeta', + + + 'midjourney'.encode('utf-8'), + 'stable-diffusion'.encode('utf-8'), + 'stable diffusion'.encode('utf-8'), + 'stable_diffusion'.encode('utf-8'), + 'artbreeder'.encode('utf-8'), + 'runwayml'.encode('utf-8'), + 'remix.ai'.encode('utf-8'), + 'firefly'.encode('utf-8'), + 'adobe_firefly'.encode('utf-8'), + + # OpenAI / DALLΒ·E indicators (all encoded to bytes) + 'openai'.encode('utf-8'), + 'dalle'.encode('utf-8'), + 'dalle2'.encode('utf-8'), + 'DALL-E'.encode('utf-8'), + 'DALLΒ·E'.encode('utf-8'), + 'created_by: openai'.encode('utf-8'), + 'tool: dalle'.encode('utf-8'), + 'tool: dalle2'.encode('utf-8'), + 'creator: openai'.encode('utf-8'), + 'creator: dalle'.encode('utf-8'), + 'openai.com'.encode('utf-8'), + 'api.openai.com'.encode('utf-8'), + 'openai_model'.encode('utf-8'), + 'openai_gpt'.encode('utf-8'), + + #Further possible AI-Generation Indicators + 'generated_by'.encode('utf-8'), + 'model_id'.encode('utf-8'), + 'model_version'.encode('utf-8'), + 'model_info'.encode('utf-8'), + 'tool_name'.encode('utf-8'), + 'tool_creator'.encode('utf-8'), + 'tool_version'.encode('utf-8'), + 'model_signature'.encode('utf-8'), + 'ai_model'.encode('utf-8'), + 'ai_tool'.encode('utf-8'), + 'generator'.encode('utf-8'), + 'generated_by_ai'.encode('utf-8'), + 'ai_generated'.encode('utf-8'), + 'ai_art'.encode('utf-8') + ] + + +def run_metadata(image_bytes: bytes) -> str: + try: + img = Image.open(io.BytesIO(image_bytes)) + img.load() + + exif = img.getexif() + software = str(exif.get(305, "")).strip() + + suspicious_editors = ["Photoshop", "GIMP", "Snapseed", "Pixlr", "VSCO", "Editor", "Adobe", "Luminar"] + + if any(editor.lower() in software.lower() for editor in suspicious_editors): + return "edited" + + if any(indicator in image_bytes for indicator in AI_INDICATORS): + return "ai_generated" + + return "undetermined" + + except UnidentifiedImageError: + return "error: invalid image format" + except Exception as e: + return f"error: {str(e)}" diff --git a/features/image_edit_detector/preprocess.py b/features/image_edit_detector/preprocess.py new file mode 100755 index 0000000000000000000000000000000000000000..55c70308cb0feb3e058b1e7d1b74bbc23d08e31b --- /dev/null +++ b/features/image_edit_detector/preprocess.py @@ -0,0 +1,9 @@ +from PIL import Image +import io + +def preprocess_image(img: Image.Image, quality: int) -> Image.Image: + buffer = io.BytesIO() + img.save(buffer, format="JPEG", quality=quality) + buffer.seek(0) + return Image.open(buffer) + diff --git a/features/image_edit_detector/routes.py b/features/image_edit_detector/routes.py new file mode 100755 index 0000000000000000000000000000000000000000..26b328d743a723e96f4cb2efceaa150888eed45d --- /dev/null +++ b/features/image_edit_detector/routes.py @@ -0,0 +1,53 @@ +from slowapi import Limiter +from config import ACCESS_RATE +from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile +from fastapi.security import HTTPBearer +from slowapi import Limiter +from slowapi.util import get_remote_address +from io import BytesIO +from .controller import process_image_ela , verify_token,process_fft_image, process_meta_image +import requests +router = APIRouter() +limiter = Limiter(key_func=get_remote_address) +security = HTTPBearer() + + + +@router.post("/ela") +@limiter.limit(ACCESS_RATE) +async def detect_ela(request:Request,file: UploadFile = File(...), quality: int = 90 ,token: str = Depends(verify_token)): + # Check file extension + allowed_types = ["image/jpeg", "image/png"] + + if file.content_type not in allowed_types: + raise HTTPException( + status_code=400, + detail="Unsupported file type. Only JPEG and PNG images are allowed." + ) + + content = await file.read() + result = await process_image_ela(content, quality) + return result + +@router.post("/fft") +@limiter.limit(ACCESS_RATE) +async def detect_fft(request:Request,file:UploadFile =File(...),threshold:float=0.95,token:str=Depends(verify_token)): + if file.content_type not in ["image/jpeg", "image/png"]: + raise HTTPException(status_code=400, detail="Unsupported image type.") + + content = await file.read() + result = await process_fft_image(content,threshold) + return result + +@router.post("/meta") +@limiter.limit(ACCESS_RATE) +async def detect_meta(request:Request,file:UploadFile=File(...),token:str=Depends(verify_token)): + if file.content_type not in ["image/jpeg", "image/png"]: + raise HTTPException(status_code=400, detail="Unsupported image type.") + content = await file.read() + result = await process_meta_image(content) + return result +@router.post("/health") +@limiter.limit(ACCESS_RATE) +def heath(request:Request): + return {"status":"ok"} diff --git a/features/nepali_text_classifier/__init__.py b/features/nepali_text_classifier/__init__.py old mode 100644 new mode 100755 diff --git a/features/nepali_text_classifier/controller.py b/features/nepali_text_classifier/controller.py old mode 100644 new mode 100755 index 26424acb88051b0a4726a3a0bc1772a931edcb03..09a816fa49adb4ced016715629fed5b2b45d14e1 --- a/features/nepali_text_classifier/controller.py +++ b/features/nepali_text_classifier/controller.py @@ -3,7 +3,6 @@ from io import BytesIO from fastapi import HTTPException, UploadFile, status, Depends from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials import os - from features.nepali_text_classifier.inferencer import classify_text from features.nepali_text_classifier.preprocess import * import re diff --git a/features/nepali_text_classifier/inferencer.py b/features/nepali_text_classifier/inferencer.py old mode 100644 new mode 100755 diff --git a/features/nepali_text_classifier/model_loader.py b/features/nepali_text_classifier/model_loader.py old mode 100644 new mode 100755 index 7784ce099ee217fccbb197dadc0847407fa2b182..2e2137a7c9d339e39619bb9760f72fb1caf652bc --- a/features/nepali_text_classifier/model_loader.py +++ b/features/nepali_text_classifier/model_loader.py @@ -8,7 +8,7 @@ from huggingface_hub import snapshot_download from transformers import AutoTokenizer, AutoModel # Configs -REPO_ID = "Pujan-Dev/Nepali-AI-VS-HUMAN" +REPO_ID = "can-org/Nepali-AI-VS-HUMAN" BASE_DIR = "./np_text_model" TOKENIZER_DIR = os.path.join(BASE_DIR, "classifier") # <- update this to match your uploaded folder WEIGHTS_PATH = os.path.join(BASE_DIR, "model_95_acc.pth") # <- change to match actual uploaded weight diff --git a/features/nepali_text_classifier/preprocess.py b/features/nepali_text_classifier/preprocess.py old mode 100644 new mode 100755 index 74e487c776a83ef6d36c2d6404a0d5254c31a840..b07de3af1de5404d0eb1348c9570287180684b73 --- a/features/nepali_text_classifier/preprocess.py +++ b/features/nepali_text_classifier/preprocess.py @@ -20,19 +20,17 @@ def parse_pdf(file: BytesIO): for page_num in range(doc.page_count): page = doc.load_page(page_num) text += page.get_text() - return text + return text except Exception as e: logging.error(f"Error while processing PDF: {str(e)}") raise HTTPException( status_code=500, detail="Error processing PDF file") - def parse_txt(file: BytesIO): return file.read().decode("utf-8") - -def end_symbol_for_NP_text(text): - if not text.endswith("ΰ₯€"): - text += "ΰ₯€" - - +def end_symbol_for_NP_text(text: str) -> str: + text = text.strip() + if not text.endswith("ΰ₯€"): + text += "ΰ₯€" + return text diff --git a/features/nepali_text_classifier/routes.py b/features/nepali_text_classifier/routes.py old mode 100644 new mode 100755 diff --git a/features/text_classifier/__init__.py b/features/text_classifier/__init__.py old mode 100644 new mode 100755 diff --git a/features/text_classifier/controller.py b/features/text_classifier/controller.py old mode 100644 new mode 100755 diff --git a/features/text_classifier/inferencer.py b/features/text_classifier/inferencer.py old mode 100644 new mode 100755 diff --git a/features/text_classifier/model_loader.py b/features/text_classifier/model_loader.py old mode 100644 new mode 100755 index 50050f9e24f1580ebfe3864979fa37f850944b75..890d1ea972816c8e69f44a619ef08e225fe4e09c --- a/features/text_classifier/model_loader.py +++ b/features/text_classifier/model_loader.py @@ -6,7 +6,7 @@ from huggingface_hub import snapshot_download import torch from dotenv import load_dotenv load_dotenv() -REPO_ID = "Pujan-Dev/AI-Text-Detector" +REPO_ID = "can-org/AI-Content-Checker" MODEL_DIR = "./models" TOKENIZER_DIR = os.path.join(MODEL_DIR, "model") WEIGHTS_PATH = os.path.join(MODEL_DIR, "model_weights.pth") diff --git a/features/text_classifier/preprocess.py b/features/text_classifier/preprocess.py old mode 100644 new mode 100755 diff --git a/features/text_classifier/routes.py b/features/text_classifier/routes.py old mode 100644 new mode 100755 diff --git a/license.md b/license.md new file mode 100644 index 0000000000000000000000000000000000000000..c6eac5dbb11da919ae0622dfa5e37e0909f40180 --- /dev/null +++ b/license.md @@ -0,0 +1,20 @@ +# License - All Rights Reserved + +Copyright (c) 2025 CyberAlertNepal + +This software and all associated materials are **not open source** and are protected under a custom license. + +## Strict Usage Terms + +Unless explicit written permission is granted by **CyberAlertNepal**, **no individual or entity** is allowed to: + +- Use this codebase or its models in any capacity β€” personal, educational, or commercial. +- Modify, copy, distribute, or sublicense any part of this project. +- Deploy, mirror, or host this project, either publicly or privately. +- Incorporate any component of this project into derivative works or other applications. + +This project is intended for **private, internal use by the author(s) only**. + +Any unauthorized usage, reproduction, or distribution is strictly prohibited and may result in legal action. + +**All rights reserved.** diff --git a/readme.md b/readme.md deleted file mode 100644 index af3c04445f0adbd68945042f1268e44e981c632f..0000000000000000000000000000000000000000 --- a/readme.md +++ /dev/null @@ -1,35 +0,0 @@ -# πŸš€ FastAPI AI Detector - -A production-ready FastAPI app for detecting AI vs. human-written text in English and Nepali. It uses GPT-2 and SentencePiece-based models, with Bearer token security. - -## πŸ“‚ Documentation - -- [Project Structure](docs/structure.md) -- [API Endpoints](docs/api_endpoints.md) -- [Setup & Installation](docs/setup.md) -- [Deployment](docs/deployment.md) -- [Security](docs/security.md) -- [NestJS Integration](docs/nestjs_integration.md) -- [Core Functions](docs/functions.md) - -## ⚑ Quick Start -```bash -uvicorn app:app --host 0.0.0.0 --port 8000 -``` -## πŸš€ Deployment - -- **Local**: Use `uvicorn` as above. -- **Railway/Heroku**: Use the provided `Procfile`. -- **Hugging Face Spaces**: Use the `Dockerfile` for container deployment. - ---- - -## πŸ’‘ Tips - -- **Model files auto-download at first start** if not found. -- **Keep `requirements.txt` up-to-date** after adding dependencies. -- **All endpoints require the correct `Authorization` header**. -- **For security**: Avoid committing `.env` to public repos. - ---- - diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 43ab64496920e14a603feec9d2fda8656c5ec747..834aeb83468452b3ca9721e13e5f87572cfb7aaa --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,10 @@ python-multipart slowapi spacy nltk +tensorflow +opencv-python +pillow +scipy +fitz +frontend +tools diff --git a/static/robots.txt b/static/robots.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391