| |
| import os |
| import tensorflow as tf |
| from tensorflow.keras.preprocessing.image import ImageDataGenerator |
| from tensorflow.keras.models import Sequential |
| from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout |
| from tensorflow.keras.optimizers import Adam |
| from tensorflow.keras.models import load_model |
| from tensorflow.keras.preprocessing.image import load_img, img_to_array |
| import shutil |
| from PIL import Image |
| from tensorflow.keras.preprocessing.image import load_img, img_to_array |
| import matplotlib.pyplot as plt |
| import cv2 |
| |
| import numpy as np |
| import pickle |
|
|
| def clean_directory(directory, cache_file="cache.pkl"): |
| if os.path.exists(cache_file): |
| with open(cache_file, "rb") as f: |
| num_classes = pickle.load(f) |
| print("Loaded cached results.") |
| return num_classes |
|
|
| num_classes = 0 |
| for subdir, dirs, files in os.walk(directory): |
| if not dirs: |
| num_classes += 1 |
| valid_files = [] |
| for file in files: |
| file_path = os.path.join(subdir, file) |
| try: |
| img = Image.open(file_path) |
| img.verify() |
| valid_files.append(file) |
| except (IOError, SyntaxError) as e: |
| print(f"Removing corrupted file: {file_path}") |
| os.remove(file_path) |
|
|
| |
| if not valid_files: |
| print(f"Removing empty directory: {subdir}") |
| shutil.rmtree(subdir) |
| num_classes -= 1 |
|
|
| |
| with open(cache_file, "wb") as f: |
| pickle.dump(num_classes, f) |
| print("Saved results to cache.") |
|
|
| return num_classes |
|
|
| data_dir = 'Malign/extract' |
|
|
| num_classes = clean_directory(data_dir) |
|
|
| |
| batch_size = 32 |
| epochs = 50 |
| image_size = (200, 200) |
| |
| train_datagen = ImageDataGenerator( |
| rescale=1./255, |
| validation_split=0.2 |
| ) |
|
|
| train_generator = train_datagen.flow_from_directory( |
| data_dir, |
| target_size=image_size, |
| batch_size=batch_size, |
| class_mode='categorical', |
| subset='training' |
| ) |
|
|
| validation_generator = train_datagen.flow_from_directory( |
| data_dir, |
| target_size=image_size, |
| batch_size=batch_size, |
| class_mode='categorical', |
| subset='validation' |
| ) |
|
|
| |
| model = Sequential() |
|
|
| |
| model.add(Conv2D(64, (3, 3), input_shape=(*image_size, 3))) |
| model.add(Activation('relu')) |
| model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
| |
| model.add(Conv2D(64, (3, 3))) |
| model.add(Activation('relu')) |
| model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
| |
| model.add(Conv2D(64, (3, 3))) |
| model.add(Activation('relu')) |
| model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
| |
| model.add(Flatten()) |
| model.add(Dense(128)) |
| model.add(Dropout(0.5)) |
| model.add(Activation('relu')) |
|
|
| |
| model.add(Dense(119)) |
| model.add(Activation('softmax')) |
|
|
| model.summary() |
|
|
| model.compile( |
| optimizer=Adam(learning_rate=0.001), |
| loss='categorical_crossentropy', |
| metrics=['accuracy'] |
| ) |
|
|
| |
| history = model.fit( |
| train_generator, |
| epochs=epochs, |
| validation_data=validation_generator |
| ) |
|
|
| |
| model.save("malware_classifier_lime.h5") |
|
|
|
|
|
|
|
|