Instructions to use abdullah890/malconv with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Keras
How to use abdullah890/malconv with Keras:
# Available backend options are: "jax", "torch", "tensorflow". import os os.environ["KERAS_BACKEND"] = "jax" import keras model = keras.saving.load_model("hf://abdullah890/malconv") - Notebooks
- Google Colab
- Kaggle
| import os | |
| import sys | |
| import itertools | |
| import numpy as np | |
| import tensorflow as tf | |
| from sklearn.model_selection import train_test_split | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from src.model import MalConv | |
| from src.utils import preprocess_dataset | |
| def hyperparameter_search(csv_path, | |
| param_grid=None, | |
| max_length=2**20, | |
| epochs=5, | |
| validation_split=0.2): | |
| """ | |
| ๊ทธ๋ฆฌ๋ ์์น๋ฅผ ํตํ ํ์ดํผํ๋ผ๋ฏธํฐ ์ต์ ํ | |
| Args: | |
| csv_path: ํ๋ จ ๋ฐ์ดํฐ CSV ๊ฒฝ๋ก | |
| param_grid: ํ์ดํผํ๋ผ๋ฏธํฐ ๊ทธ๋ฆฌ๋ | |
| max_length: ์ต๋ ์ ๋ ฅ ๊ธธ์ด | |
| epochs: ํ๋ จ ์ํฌํฌ ์ | |
| validation_split: ๊ฒ์ฆ ๋ฐ์ดํฐ ๋น์จ | |
| """ | |
| if param_grid is None: | |
| param_grid = { | |
| 'embedding_size': [8, 16], | |
| 'num_filters': [64, 128], | |
| 'fc_size': [64, 128], | |
| 'learning_rate': [0.001, 0.0001] | |
| } | |
| print("๋ฐ์ดํฐ ๋ก๋ฉ ์ค...") | |
| X, y = preprocess_dataset(csv_path, max_length) | |
| X_train, X_val, y_train, y_val = train_test_split( | |
| X, y, test_size=validation_split, random_state=42, stratify=y | |
| ) | |
| # ๋ชจ๋ ํ์ดํผํ๋ผ๋ฏธํฐ ์กฐํฉ ์์ฑ | |
| param_names = list(param_grid.keys()) | |
| param_values = list(param_grid.values()) | |
| param_combinations = list(itertools.product(*param_values)) | |
| best_score = 0 | |
| best_params = None | |
| results = [] | |
| print(f"์ด {len(param_combinations)}๊ฐ์ ์กฐํฉ์ ํ ์คํธํฉ๋๋ค.") | |
| for i, params in enumerate(param_combinations): | |
| param_dict = dict(zip(param_names, params)) | |
| print(f"\n[{i+1}/{len(param_combinations)}] ํ ์คํธ ์ค: {param_dict}") | |
| try: | |
| # ๋ชจ๋ธ ์์ฑ | |
| model = MalConv( | |
| max_input_length=max_length, | |
| embedding_size=param_dict['embedding_size'], | |
| num_filters=param_dict['num_filters'], | |
| fc_size=param_dict['fc_size'] | |
| ) | |
| # ์ปดํ์ผ | |
| model.compile( | |
| optimizer=tf.keras.optimizers.Adam( | |
| learning_rate=param_dict['learning_rate'] | |
| ), | |
| loss='binary_crossentropy', | |
| metrics=['accuracy'] | |
| ) | |
| # ๋๋ฏธ ์ ๋ ฅ์ผ๋ก ๋ชจ๋ธ ๋น๋ | |
| dummy_input = np.zeros((1, max_length), dtype=np.uint8) | |
| _ = model(dummy_input) | |
| # ํ๋ จ | |
| history = model.fit( | |
| X_train, y_train, | |
| batch_size=16, | |
| epochs=epochs, | |
| validation_data=(X_val, y_val), | |
| verbose=0 | |
| ) | |
| # ํ๊ฐ | |
| val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0) | |
| result = { | |
| 'params': param_dict, | |
| 'val_accuracy': val_acc, | |
| 'val_loss': val_loss | |
| } | |
| results.append(result) | |
| print(f"๊ฒ์ฆ ์ ํ๋: {val_acc:.4f}") | |
| # ์ต๊ณ ์ฑ๋ฅ ์ ๋ฐ์ดํธ | |
| if val_acc > best_score: | |
| best_score = val_acc | |
| best_params = param_dict | |
| print(f"์๋ก์ด ์ต๊ณ ์ฑ๋ฅ! ์ ํ๋: {best_score:.4f}") | |
| except Exception as e: | |
| print(f"์๋ฌ ๋ฐ์: {e}") | |
| continue | |
| print("\n" + "="*50) | |
| print("ํ์ดํผํ๋ผ๋ฏธํฐ ํ๋ ์๋ฃ") | |
| print("="*50) | |
| print(f"์ต๊ณ ์ฑ๋ฅ: {best_score:.4f}") | |
| print(f"์ต์ ํ์ดํผํ๋ผ๋ฏธํฐ: {best_params}") | |
| # ๊ฒฐ๊ณผ ์ ๋ ฌ | |
| results.sort(key=lambda x: x['val_accuracy'], reverse=True) | |
| print("\n์์ 5๊ฐ ๊ฒฐ๊ณผ:") | |
| for i, result in enumerate(results[:5]): | |
| print(f"{i+1}. ์ ํ๋: {result['val_accuracy']:.4f}, " | |
| f"ํ๋ผ๋ฏธํฐ: {result['params']}") | |
| return best_params, results | |
| def main(): | |
| csv_path = "Input/sample_data.csv" # ์ค์ ๋ฐ์ดํฐ ๊ฒฝ๋ก๋ก ๋ณ๊ฒฝ | |
| # ์ปค์คํ ํ์ดํผํ๋ผ๋ฏธํฐ ๊ทธ๋ฆฌ๋ | |
| param_grid = { | |
| 'embedding_size': [8, 16], | |
| 'num_filters': [64, 128], | |
| 'fc_size': [64, 128], | |
| 'learning_rate': [0.001, 0.0001] | |
| } | |
| best_params, results = hyperparameter_search( | |
| csv_path=csv_path, | |
| param_grid=param_grid, | |
| epochs=3 # ๋น ๋ฅธ ํ ์คํธ๋ฅผ ์ํด ์ํฌํฌ ์ ๊ฐ์ | |
| ) | |
| print(f"\n์ต์ ํ์ดํผํ๋ผ๋ฏธํฐ๋ก ๋ชจ๋ธ์ ๋ค์ ํ๋ จํ์ธ์:") | |
| print(f"python src/train.py {csv_path} --epochs 10") | |
| if __name__ == "__main__": | |
| main() | |