import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model MODEL_ID = "Bernadetta14/qwen2.5-coder-0.5b-vulnerability-detector" print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float32, device_map="cpu" ) model.eval() print("Model loaded!") SYSTEM_PROMPT = "You are a code security expert. Analyze the given code for security vulnerabilities and provide a structured security report." def analyze_code(code, lang): if not code.strip(): return "Please enter some code to analyze." prompt = f"""<|im_start|>system {SYSTEM_PROMPT}<|im_end|> <|im_start|>user Analyze this {lang} code for security vulnerabilities: ```{lang} {code[:800]} ``` Provide a structured security report:<|im_end|> <|im_start|>assistant Vulnerability :""" inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=300, do_sample=False, pad_token_id=tokenizer.eos_token_id ) new_tokens = outputs[0][inputs["input_ids"].shape[1]:] result = tokenizer.decode(new_tokens, skip_special_tokens=True) return "Vulnerability :" + result # Contoh code vulnerable EXAMPLES = { "Python - SQL Injection": """\ def login(username, password): query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'" result = db.execute(query) return result """, "JavaScript - XSS": """\ function displayMessage(userInput) { document.getElementById('output').innerHTML = userInput; } """, "PHP - Command Injection": """\ """ } with gr.Blocks(title="Code Vulnerability Detector", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # Code Vulnerability Detector Deteksi celah keamanan dalam kode secara otomatis menggunakan AI. Model akan menganalisis kode dan memberikan laporan terstruktur berisi jenis vulnerability, tingkat bahaya, penjelasan, dan saran perbaikan. **Model:** Qwen2.5-Coder-0.5B (fine-tuned on CyberNative Security Dataset) """) with gr.Row(): with gr.Column(): lang_input = gr.Dropdown( choices=["python", "javascript", "java", "c++", "c#", "php", "ruby", "swift", "go", "kotlin"], value="python", label="Programming Language" ) code_input = gr.Code( label="Code to Analyze", language="python", lines=15, value=EXAMPLES["Python - SQL Injection"] ) with gr.Row(): example_btn = gr.Dropdown( choices=list(EXAMPLES.keys()), label="Load Example", value="Python - SQL Injection" ) analyze_btn = gr.Button("Analyze Code", variant="primary") with gr.Column(): output = gr.Textbox( label="Security Report", lines=15, interactive=False ) gr.Markdown(""" --- ### Model Performance | Metric | Value | |--------|-------| | Accuracy | 62% | | Training Samples | 4,187 | | Supported Languages | 11 | | Model Size | 0.5B params | **Dataset:** CyberNative/Code_Vulnerability_Security_DPO **Method:** LoRA fine-tuning via MLX-LM (Apple Silicon) """) # Load example def load_example(example_name): return EXAMPLES.get(example_name, "") example_btn.change(fn=load_example, inputs=example_btn, outputs=code_input) # Update code language highlight when lang changes lang_input.change(fn=lambda l: gr.Code(language=l if l in ["python", "javascript", "java"] else "python"), inputs=lang_input, outputs=code_input) analyze_btn.click( fn=analyze_code, inputs=[code_input, lang_input], outputs=output ) demo.launch()