Spaces:
Runtime error
Runtime error
| import sys | |
| import os | |
| PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| sys.path.append(PROJECT_ROOT) | |
| import numpy as np | |
| import pandas as pd | |
| import xgboost as xgb | |
| # ------------------------------- | |
| # IMPORT FEATURE EXTRACTORS | |
| # ------------------------------- | |
| from featureextraction.step1_statistical_extraction.step1_statistical_extraction import extract_features as extract_stat | |
| from featureextraction.step2_ast_extraction.step2_ast_extraction import extract_ast_features | |
| from featureextraction.step3_stylometry_extraction.step3_stylometry_extraction import extract_stylometry_features | |
| from featureextraction.semantic_features.unixcoder_embedding import get_unixcoder_embedding | |
| # XAI modules | |
| from xai.shaplayer import shap_explain | |
| from xai.grouping import group_shap_explanations | |
| from xai.text_explainer import generate_text_explanation | |
| # ------------------------------- | |
| # LOAD MODEL | |
| # ------------------------------- | |
| model = xgb.XGBClassifier() | |
| model.load_model("classifier/xgboost_final_model.json") | |
| # ------------------------------- | |
| # LANGUAGE ONE-HOT | |
| # ------------------------------- | |
| def encode_language(language): | |
| language = language.lower() | |
| if language == "python": | |
| return np.array([1, 0]) | |
| elif language == "java": | |
| return np.array([0, 1]) | |
| else: | |
| raise ValueError("Language must be python or java") | |
| # ------------------------------- | |
| # BUILD FEATURES FROM CODE | |
| # ------------------------------- | |
| def build_features_from_code(code, language): | |
| df = pd.DataFrame({ | |
| "normalized_code": [code], | |
| "Language": [language] | |
| }) | |
| stat_df = extract_stat(df) | |
| ast_df = extract_ast_features(df) | |
| style_df = extract_stylometry_features(df) | |
| X_stat = stat_df.drop(columns=["language"]).values.flatten() | |
| X_ast = ast_df.values.flatten() | |
| X_style = style_df.values.flatten() | |
| X_lang = encode_language(language) | |
| X_sem = get_unixcoder_embedding(code) | |
| X_final = np.hstack([ | |
| X_stat, | |
| X_ast, | |
| X_style, | |
| X_lang, | |
| X_sem | |
| ]).reshape(1, -1) | |
| return X_final | |
| # ------------------------------- | |
| # BASIC PREDICT FUNCTION | |
| # ------------------------------- | |
| def predict_from_features(X_final): | |
| prediction = model.predict(X_final)[0] | |
| probability = model.predict_proba(X_final)[0][1] | |
| label_name = "AI" if prediction == 1 else "Human" | |
| return label_name, probability | |
| # ------------------------------- | |
| # INTERACTIVE CLI | |
| # ------------------------------- | |
| if __name__ == "__main__": | |
| print("\n======================================") | |
| print(" AI vs Human Code Classification") | |
| print("======================================") | |
| language = input("Choose language (python/java): ").strip().lower() | |
| print("\nPaste your code below.") | |
| print("Type 'END' on a new line when finished.\n") | |
| lines = [] | |
| while True: | |
| line = input() | |
| if line.strip() == "END": | |
| break | |
| lines.append(line) | |
| code_input = "\n".join(lines) | |
| # build features | |
| X_final = build_features_from_code(code_input, language) | |
| # predict | |
| label, prob = predict_from_features(X_final) | |
| # shap | |
| shap_result = shap_explain(model, X_final) | |
| # grouping | |
| grouped = group_shap_explanations(shap_result) | |
| # text explanation | |
| text_reason = generate_text_explanation(grouped, label, prob) | |
| print("\n========== RESULT ==========") | |
| print("Prediction :", label) | |
| print("Confidence :", prob) | |
| print("\nTop SHAP features:") | |
| for e in shap_result: | |
| print(f"Feature {e['feature_index']} → {e['impact']} ({e['pushes_toward']})") | |
| print("\nGrouped SHAP importance:", grouped) | |
| print("\nExplanation:\n") | |
| print(text_reason) |