| | import torch |
| | from transformers import GPT2LMHeadModel, GPT2TokenizerFast |
| | import gradio as gr |
| |
|
| | |
| | |
| | |
| | MODEL_NAME = "gpt2" |
| | WEIGHTS_PATH = "gpt2_spoc.pt" |
| | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
| |
|
| | |
| | |
| | |
| | tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_NAME) |
| | tokenizer.add_special_tokens({ |
| | "additional_special_tokens": ["<|pc|>", "<|code|>", "<|end|>"] |
| | }) |
| |
|
| | model = GPT2LMHeadModel.from_pretrained(MODEL_NAME) |
| | model.resize_token_embeddings(len(tokenizer)) |
| | model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE)) |
| | model.to(DEVICE) |
| | model.eval() |
| |
|
| | |
| | |
| | |
| | def generate_code(pseudo, max_new_tokens=200): |
| | if not pseudo.strip(): |
| | return "⚠️ Please enter some pseudo-code." |
| | |
| | prompt = f"<|pc|>\n{pseudo.strip()}\n<|code|>\n" |
| | inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) |
| |
|
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=max_new_tokens, |
| | temperature=0.7, |
| | top_p=0.9, |
| | do_sample=True, |
| | pad_token_id=tokenizer.eos_token_id |
| | ) |
| |
|
| | text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | return text.split("<|code|>")[-1].split("<|end|>")[0].strip() |
| |
|
| | |
| | |
| | |
| | demo = gr.Interface( |
| | fn=generate_code, |
| | inputs=gr.Textbox(lines=10, label="🧠 Enter Pseudo-Code"), |
| | outputs=gr.Code(label="💻 Generated Code", language="python"), |
| | title="Pseudo-Code → Code Generator (GPT-2 SPOC)", |
| | description="Fine-tuned GPT-2 model that converts pseudo-code into working Python code.", |
| | examples=[ |
| | ["Read integer n\nRead n integers into a list\nPrint the sum of the list"], |
| | ["Input two numbers a and b\nIf a > b, print a else print b"], |
| | ["Read a string s\nReverse it and print"] |
| | ], |
| | theme="gradio/soft", |
| | ) |
| |
|
| | |
| | |
| | |
| | if __name__ == "__main__": |
| | demo.launch() |