Bernadetta14's picture
Update app.py
b933169 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load model
MODEL_ID = "Bernadetta14/qwen2.5-coder-0.5b-vulnerability-detector"
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
device_map="cpu"
)
model.eval()
print("Model loaded!")
SYSTEM_PROMPT = "You are a code security expert. Analyze the given code for security vulnerabilities and provide a structured security report."
def analyze_code(code, lang):
if not code.strip():
return "Please enter some code to analyze."
prompt = f"""<|im_start|>system
{SYSTEM_PROMPT}<|im_end|>
<|im_start|>user
Analyze this {lang} code for security vulnerabilities:
```{lang}
{code[:800]}
```
Provide a structured security report:<|im_end|>
<|im_start|>assistant
Vulnerability :"""
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=300,
do_sample=False,
pad_token_id=tokenizer.eos_token_id
)
new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
result = tokenizer.decode(new_tokens, skip_special_tokens=True)
return "Vulnerability :" + result
# Contoh code vulnerable
EXAMPLES = {
"Python - SQL Injection": """\
def login(username, password):
query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'"
result = db.execute(query)
return result
""",
"JavaScript - XSS": """\
function displayMessage(userInput) {
document.getElementById('output').innerHTML = userInput;
}
""",
"PHP - Command Injection": """\
<?php
$filename = $_GET['file'];
$output = shell_exec('cat ' . $filename);
echo $output;
?>
"""
}
with gr.Blocks(title="Code Vulnerability Detector", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# Code Vulnerability Detector
Deteksi celah keamanan dalam kode secara otomatis menggunakan AI.
Model akan menganalisis kode dan memberikan laporan terstruktur berisi
jenis vulnerability, tingkat bahaya, penjelasan, dan saran perbaikan.
**Model:** Qwen2.5-Coder-0.5B (fine-tuned on CyberNative Security Dataset)
""")
with gr.Row():
with gr.Column():
lang_input = gr.Dropdown(
choices=["python", "javascript", "java", "c++", "c#", "php", "ruby", "swift", "go", "kotlin"],
value="python",
label="Programming Language"
)
code_input = gr.Code(
label="Code to Analyze",
language="python",
lines=15,
value=EXAMPLES["Python - SQL Injection"]
)
with gr.Row():
example_btn = gr.Dropdown(
choices=list(EXAMPLES.keys()),
label="Load Example",
value="Python - SQL Injection"
)
analyze_btn = gr.Button("Analyze Code", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Security Report",
lines=15,
interactive=False
)
gr.Markdown("""
---
### Model Performance
| Metric | Value |
|--------|-------|
| Accuracy | 62% |
| Training Samples | 4,187 |
| Supported Languages | 11 |
| Model Size | 0.5B params |
**Dataset:** CyberNative/Code_Vulnerability_Security_DPO
**Method:** LoRA fine-tuning via MLX-LM (Apple Silicon)
""")
# Load example
def load_example(example_name):
return EXAMPLES.get(example_name, "")
example_btn.change(fn=load_example, inputs=example_btn, outputs=code_input)
# Update code language highlight when lang changes
lang_input.change(fn=lambda l: gr.Code(language=l if l in ["python", "javascript", "java"] else "python"), inputs=lang_input, outputs=code_input)
analyze_btn.click(
fn=analyze_code,
inputs=[code_input, lang_input],
outputs=output
)
demo.launch()