| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
|
|
| |
| MODEL_ID = "Bernadetta14/qwen2.5-coder-0.5b-vulnerability-detector" |
|
|
| print("Loading model...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| torch_dtype=torch.float32, |
| device_map="cpu" |
| ) |
| model.eval() |
| print("Model loaded!") |
|
|
| SYSTEM_PROMPT = "You are a code security expert. Analyze the given code for security vulnerabilities and provide a structured security report." |
|
|
| def analyze_code(code, lang): |
| if not code.strip(): |
| return "Please enter some code to analyze." |
|
|
| prompt = f"""<|im_start|>system |
| {SYSTEM_PROMPT}<|im_end|> |
| <|im_start|>user |
| Analyze this {lang} code for security vulnerabilities: |
| |
| ```{lang} |
| {code[:800]} |
| ``` |
| |
| Provide a structured security report:<|im_end|> |
| <|im_start|>assistant |
| Vulnerability :""" |
|
|
| inputs = tokenizer(prompt, return_tensors="pt") |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=300, |
| do_sample=False, |
| pad_token_id=tokenizer.eos_token_id |
| ) |
|
|
| new_tokens = outputs[0][inputs["input_ids"].shape[1]:] |
| result = tokenizer.decode(new_tokens, skip_special_tokens=True) |
| return "Vulnerability :" + result |
|
|
| |
| EXAMPLES = { |
| "Python - SQL Injection": """\ |
| def login(username, password): |
| query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'" |
| result = db.execute(query) |
| return result |
| """, |
| "JavaScript - XSS": """\ |
| function displayMessage(userInput) { |
| document.getElementById('output').innerHTML = userInput; |
| } |
| """, |
| "PHP - Command Injection": """\ |
| <?php |
| $filename = $_GET['file']; |
| $output = shell_exec('cat ' . $filename); |
| echo $output; |
| ?> |
| """ |
| } |
|
|
| with gr.Blocks(title="Code Vulnerability Detector", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # Code Vulnerability Detector |
| |
| Deteksi celah keamanan dalam kode secara otomatis menggunakan AI. |
| Model akan menganalisis kode dan memberikan laporan terstruktur berisi |
| jenis vulnerability, tingkat bahaya, penjelasan, dan saran perbaikan. |
| |
| **Model:** Qwen2.5-Coder-0.5B (fine-tuned on CyberNative Security Dataset) |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| lang_input = gr.Dropdown( |
| choices=["python", "javascript", "java", "c++", "c#", "php", "ruby", "swift", "go", "kotlin"], |
| value="python", |
| label="Programming Language" |
| ) |
| code_input = gr.Code( |
| label="Code to Analyze", |
| language="python", |
| lines=15, |
| value=EXAMPLES["Python - SQL Injection"] |
| ) |
| with gr.Row(): |
| example_btn = gr.Dropdown( |
| choices=list(EXAMPLES.keys()), |
| label="Load Example", |
| value="Python - SQL Injection" |
| ) |
| analyze_btn = gr.Button("Analyze Code", variant="primary") |
|
|
| with gr.Column(): |
| output = gr.Textbox( |
| label="Security Report", |
| lines=15, |
| interactive=False |
| ) |
|
|
| gr.Markdown(""" |
| --- |
| ### Model Performance |
| | Metric | Value | |
| |--------|-------| |
| | Accuracy | 62% | |
| | Training Samples | 4,187 | |
| | Supported Languages | 11 | |
| | Model Size | 0.5B params | |
| |
| **Dataset:** CyberNative/Code_Vulnerability_Security_DPO |
| **Method:** LoRA fine-tuning via MLX-LM (Apple Silicon) |
| """) |
|
|
| |
| def load_example(example_name): |
| return EXAMPLES.get(example_name, "") |
|
|
| example_btn.change(fn=load_example, inputs=example_btn, outputs=code_input) |
|
|
| |
| lang_input.change(fn=lambda l: gr.Code(language=l if l in ["python", "javascript", "java"] else "python"), inputs=lang_input, outputs=code_input) |
|
|
| analyze_btn.click( |
| fn=analyze_code, |
| inputs=[code_input, lang_input], |
| outputs=output |
| ) |
|
|
| demo.launch() |
|
|