Spaces:
Sleeping
Sleeping
File size: 7,605 Bytes
e964b12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import pandas as pd
import numpy as np
import pickle
import os
import warnings
warnings.filterwarnings("ignore", message="X does not have valid feature names")
# Dope,R,7000000.0,103.0,2002.0,6100010.0,17506470.0,7.2,89000.0,United States,Adventure Comedy Crime Drama,200.0,85.04,0,6.0,2015.0
# Dora and the Lost City of Gold,PG,49000000.0,102.0,3735.0,17431588.0,60477943.0,6.1,35000.0,"Australia, United States",Action Adventure Comedy Family Fantasy Mystery,179.0,82.17,0,8.0,2019.0
# Double Take,PG-13,24000000.0,88.0,1631.0,11736236.0,29831583.0,5.4,88000.0,United States,Action Comedy Crime Thriller,100.0,14.86,0,1.0,2001.0
# Doubt,PG-13,20000000.0,104.0,1287.0,507226.0,33446470.0,7.5,136000.0,United States,Drama Mystery,257.0,77.46,0,12.0,2008.0
# Down to Earth,PG-13,49000000.0,87.0,2521.0,20027309.0,64186502.0,5.4,25000.0,United States,Comedy Fantasy,124.0,22.71,0,2.0,2001.0
# Downsizing,R,68000000.0,135.0,2668.0,4954287.0,24449754.0,5.8,125000.0,United States,Drama Fantasy Sci-Fi,349.0,49.34,0,12.0,2017.0
# Downton Abbey,PG,13000000.0,122.0,3548.0,31033665.0,96853865.0,7.4,63000.0,United Kingdom,Drama Romance,303.0,81.23,0,9.0,2019.0
# Dr. Dolittle 2,PG,72000000.0,87.0,3053.0,25037039.0,112952899.0,4.7,47000.0,United States,Comedy Family Fantasy,135.0,42.66,0,6.0,2001.0
# Dracula 2000,R,54000000.0,100.0,2204.0,8636567.0,33022767.0,4.9,37000.0,"Canada, United States",Action Fantasy Horror Thriller,14.0,26.0,0,12.0,2000.0
# Dracula Untold,PG-13,70000000.0,92.0,2900.0,23514615.0,56280355.0,6.2,208000.0,United States,Action Drama Fantasy Horror,168.0,27.68,0,10.0,2014.0
# Draft Day,PG-13,25000000.0,110.0,2781.0,9783603.0,28842237.0,6.8,67000.0,United States,Drama Sport,196.0,58.99,0,4.0,2014.0
# Drag Me to Hell,PG-13,30000000.0,99.0,2510.0,15825480.0,42100625.0,6.6,218000.0,United States,Horror,302.0,91.05,0,5.0,2009.0
# Dragon Wars: D-War,PG-13,32000000.0,107.0,2277.0,5376000.0,10977721.0,3.5,25000.0,"Republic of Korea, United States",Action Drama Fantasy Thriller,47.0,29.77,0,9.0,2007.0
# Dragonball Evolution,PG,30000000.0,85.0,2181.0,4756488.0,9362785.0,2.5,79000.0,United States,Action Adventure Fantasy Sci-Fi Thriller,10.0,45.0,0,4.0,2009.0
# Dragonfly,PG-13,60000000.0,104.0,2507.0,10216025.0,30323400.0,6.1,40000.0,"Germany, United States",Drama Fantasy Mystery Romance Thriller,158.0,10.76,0,2.0,2002.0
def predict_with_feature_selection(model_file_name, month, year, mpaa, budget, runtime, screens, opening_week, user_vote, ratings, critic_vote, meta_score, sequel, genres, country):
movie = {}
movie["month"] = float(month)
movie["year"] = float(year)
movie["mpaa"] = mpaa
movie["budget"] = float(budget)
movie["runtime"] = float(runtime)
movie["screens"] = float(screens)
movie["opening_week"] = float(opening_week)
movie["user_vote"] = float(user_vote)
movie["ratings"] = float(ratings)
movie["critic_vote"] = float(critic_vote)
movie["meta_score"] = float(meta_score)
movie["sequel"] = float(sequel)
movie["genres"] = genres
movie["country"] = country
with open(model_file_name, "rb") as f:
model = pickle.load(f)
with open("../model_efa/mpaa_label_encoder.pkl", "rb") as f:
mpaa_label_encoder = pickle.load(f)
with open("../model_efa/country_label_encoder.pkl", "rb") as f:
country_label_encoder = pickle.load(f)
with open("../model_efa/scaler.pkl", "rb") as f:
scaler = pickle.load(f)
with open("../model_efa/factor_analyzer.pkl", "rb") as f:
fa = pickle.load(f)
with open("../model_efa/unique_genres.pkl", "rb") as f:
unique_genres = pickle.load(f)
with open("../model_efa/selected_features.pkl", "rb") as f:
selected_features = pickle.load(f)
movie["mpaa"] = mpaa_label_encoder.transform([movie["mpaa"]])[0]
movie["country"] = country_label_encoder.transform([movie["country"]])[0]
new_movie_genres = np.array(
[
1 if genre in movie.get("genres", "").split() else 0
for genre in unique_genres
]
).reshape(1, -1)
new_movie_genres_scaled = scaler.transform(new_movie_genres)
new_movie_factors = fa.transform(new_movie_genres_scaled)
movie.update(
{
f"Factor{i+1}": new_movie_factors[0, i]
for i in range(new_movie_factors.shape[1])
}
)
movie_df = pd.DataFrame([movie])
movie_df = movie_df[selected_features]
prediction_log = model.predict(movie_df)
prediction = np.expm1(prediction_log)
return prediction[0]
def predict_with_feature_selection_without_opening_week(model_file_name, month, year, mpaa, budget, runtime, screens, critic_vote, meta_score, sequel, genres, country):
movie = {}
movie["month"] = float(month)
movie["year"] = float(year)
movie["mpaa"] = mpaa
movie["budget"] = float(budget)
movie["runtime"] = float(runtime)
movie["screens"] = float(screens)
movie["critic_vote"] = float(critic_vote)
movie["meta_score"] = float(meta_score)
movie["sequel"] = float(sequel)
movie["genres"] = genres
movie["country"] = country
with open(model_file_name, "rb") as f:
model = pickle.load(f)
with open("../model_efa/mpaa_label_encoder.pkl", "rb") as f:
mpaa_label_encoder = pickle.load(f)
with open("../model_efa/country_label_encoder.pkl", "rb") as f:
country_label_encoder = pickle.load(f)
with open("../model_efa/scaler.pkl", "rb") as f:
scaler = pickle.load(f)
with open("../model_efa/factor_analyzer.pkl", "rb") as f:
fa = pickle.load(f)
with open("../model_efa/unique_genres.pkl", "rb") as f:
unique_genres = pickle.load(f)
with open("../model_efa/selected_features_without_opening_week.pkl", "rb") as f:
selected_features = pickle.load(f)
movie["mpaa"] = mpaa_label_encoder.transform([movie["mpaa"]])[0]
movie["country"] = country_label_encoder.transform([movie["country"]])[0]
new_movie_genres = np.array(
[
1 if genre in movie.get("genres", "").split() else 0
for genre in unique_genres
]
).reshape(1, -1)
new_movie_genres_scaled = scaler.transform(new_movie_genres)
new_movie_factors = fa.transform(new_movie_genres_scaled)
movie.update(
{
f"Factor{i+1}": new_movie_factors[0, i]
for i in range(new_movie_factors.shape[1])
}
)
movie_df = pd.DataFrame([movie])
movie_df = movie_df[selected_features]
prediction_log = model.predict(movie_df)
prediction = np.expm1(prediction_log)
return prediction[0]
if __name__ == '__main__':
list_file_name = ["../model_efa/model_rf.pkl", "../model_efa/model_gb.pkl", "../model_efa/model_xgb.pkl", "../model_efa/model_lgbm.pkl", "../model_efa/model_cb.pkl"]
list_file_name_without_opening_week = ["../model_efa/model_rf_without_opening_week.pkl", "../model_efa/model_gb_without_opening_week.pkl", "../model_efa/model_xgb_without_opening_week.pkl", "../model_efa/model_lgbm_without_opening_week.pkl", "../model_efa/model_cb_without_opening_week.pkl"]
for file_name in list_file_name:
print(predict_with_feature_selection(file_name, 1, 2021, "PG-13", 15000000, 103, 3427, 24727437, 72082999, 7.2, 355000, 88.32, 0, "Drama Horror Mystery Sci-Fi Thriller", "United States"))
for file_name in list_file_name_without_opening_week:
print(predict_with_feature_selection_without_opening_week(file_name, 1, 2021, "PG-13", 15000000, 103, 3427, 355000, 88.32, 0, "Drama Horror Mystery Sci-Fi Thriller", "United States")) |