Spaces:

Bernadetta14
/

code-vulnerability-detector

Sleeping

App Files Files Community

code-vulnerability-detector / app.py

Bernadetta14

Update app.py

b933169 verified 3 days ago

raw

history blame contribute delete

4.17 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Load model
	MODEL_ID = "Bernadetta14/qwen2.5-coder-0.5b-vulnerability-detector"

	print("Loading model...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu"
	)
	model.eval()
	print("Model loaded!")

	SYSTEM_PROMPT = "You are a code security expert. Analyze the given code for security vulnerabilities and provide a structured security report."

	def analyze_code(code, lang):
	if not code.strip():
	return "Please enter some code to analyze."

	prompt = f"""<\|im_start\|>system
	{SYSTEM_PROMPT}<\|im_end\|>
	<\|im_start\|>user
	Analyze this {lang} code for security vulnerabilities:

	```{lang}
	{code[:800]}
	```

	Provide a structured security report:<\|im_end\|>
	<\|im_start\|>assistant
	Vulnerability :"""

	inputs = tokenizer(prompt, return_tensors="pt")
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=300,
	do_sample=False,
	pad_token_id=tokenizer.eos_token_id
	)

	new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
	result = tokenizer.decode(new_tokens, skip_special_tokens=True)
	return "Vulnerability :" + result

	# Contoh code vulnerable
	EXAMPLES = {
	"Python - SQL Injection": """\
	def login(username, password):
	query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'"
	result = db.execute(query)
	return result
	""",
	"JavaScript - XSS": """\
	function displayMessage(userInput) {
	document.getElementById('output').innerHTML = userInput;
	}
	""",
	"PHP - Command Injection": """\
	<?php
	$filename = $_GET['file'];
	$output = shell_exec('cat ' . $filename);
	echo $output;
	?>
	"""
	}

	with gr.Blocks(title="Code Vulnerability Detector", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# Code Vulnerability Detector

	Deteksi celah keamanan dalam kode secara otomatis menggunakan AI.
	Model akan menganalisis kode dan memberikan laporan terstruktur berisi
	jenis vulnerability, tingkat bahaya, penjelasan, dan saran perbaikan.

	Model: Qwen2.5-Coder-0.5B (fine-tuned on CyberNative Security Dataset)
	""")

	with gr.Row():
	with gr.Column():
	lang_input = gr.Dropdown(
	choices=["python", "javascript", "java", "c++", "c#", "php", "ruby", "swift", "go", "kotlin"],
	value="python",
	label="Programming Language"
	)
	code_input = gr.Code(
	label="Code to Analyze",
	language="python",
	lines=15,
	value=EXAMPLES["Python - SQL Injection"]
	)
	with gr.Row():
	example_btn = gr.Dropdown(
	choices=list(EXAMPLES.keys()),
	label="Load Example",
	value="Python - SQL Injection"
	)
	analyze_btn = gr.Button("Analyze Code", variant="primary")

	with gr.Column():
	output = gr.Textbox(
	label="Security Report",
	lines=15,
	interactive=False
	)

	gr.Markdown("""
	---
	### Model Performance
	\| Metric \| Value \|
	\|--------\|-------\|
	\| Accuracy \| 62% \|
	\| Training Samples \| 4,187 \|
	\| Supported Languages \| 11 \|
	\| Model Size \| 0.5B params \|

	Dataset: CyberNative/Code_Vulnerability_Security_DPO
	Method: LoRA fine-tuning via MLX-LM (Apple Silicon)
	""")

	# Load example
	def load_example(example_name):
	return EXAMPLES.get(example_name, "")

	example_btn.change(fn=load_example, inputs=example_btn, outputs=code_input)

	# Update code language highlight when lang changes
	lang_input.change(fn=lambda l: gr.Code(language=l if l in ["python", "javascript", "java"] else "python"), inputs=lang_input, outputs=code_input)

	analyze_btn.click(
	fn=analyze_code,
	inputs=[code_input, lang_input],
	outputs=output
	)

	demo.launch()