Instructions to use ctrlos/shellgpt with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Adapters
How to use ctrlos/shellgpt with Adapters:
from adapters import AutoAdapterModel model = AutoAdapterModel.from_pretrained("undefined") model.load_adapter("ctrlos/shellgpt", set_active=True) - Notebooks
- Google Colab
- Kaggle
| from transformers import AutoModel, AutoTokenizer | |
| from datasets import load_dataset | |
| from torch.utils.data import DataLoader, Dataset | |
| import torch.optim as optim | |
| import torch.nn as nn | |
| class ShellcodeDataset(Dataset): | |
| def __init__(self, data, tokenizer): | |
| self.data = data | |
| self.tokenizer = tokenizer | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| intent = self.data[idx]['intent'] | |
| snippet = self.data[idx]['snippet'] | |
| encoding = self.tokenizer(intent, return_tensors="pt", padding="max_length", truncation=True, max_length=1024) | |
| label = self.tokenizer(snippet, return_tensors="pt", padding="max_length", truncation=True, max_length=1024) | |
| return {'input_ids': encoding['input_ids'], 'labels': label['input_ids']} | |
| # Initialize tokenizer and model | |
| model_name = "openai-community/gpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModel.from_pretrained(model_name) | |
| # Add padding token to the tokenizer | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load the dataset | |
| dataset = load_dataset('SoLID/shellcode_i_a32') | |
| # Create the dataset and dataloader | |
| train_dataset = ShellcodeDataset(dataset['train'], tokenizer) | |
| train_dataloader = DataLoader(train_dataset, batch_size=16) | |
| # Define the optimizer and criterion | |
| optimizer = optim.Adam(model.parameters()) | |
| criterion = nn.CrossEntropyLoss() | |
| # Training loop | |
| model.train() | |
| for epoch in range(3): | |
| for batch in train_dataloader: | |
| optimizer.zero_grad() | |
| input_ids, labels = batch['input_ids'], batch['labels'] | |
| outputs = model(input_ids) | |
| loss = criterion(outputs.logits, labels) | |
| loss.backward() | |
| optimizer.step() | |