| import gradio as gr |
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| sns.set_style("darkgrid", |
| {"grid.color": ".6", |
| "grid.linestyle": ":"}) |
| import category_encoders as ce |
| from sklearn.decomposition import TruncatedSVD |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.metrics.pairwise import cosine_similarity |
| from sklearn.preprocessing import LabelEncoder |
| from sklearn.preprocessing import OneHotEncoder |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| def predict_match(name, body_profile_type): |
| user_df = {} |
|
|
| |
| |
|
|
| user_df['age'] = 22.0 |
| user_df['status'] = 1.0 |
| user_df['sex'] = 0.0 |
| user_df['height'] = 60.0 |
| user_df['smokes'] = 1.0 |
| user_df['new_languages'] = 2.0 |
| user_df['body_profile'] = float(body_profile_type) |
| user_df['education_level'] = 4.0 |
| user_df['dropped_out'] = 0.0 |
| user_df['bio'] = 'I am a foodie and traveller. But sometimes like to sit alone in a corner and read a good fiction.' |
| user_df['location_preference'] = 2.0 |
| user_df['num_languages'] = 2.0 |
| user_df['drinks_encoded'] = 0.0 |
| user_df['drugs_encoded'] = 0.0 |
| |
| user_df['location_new_york'] = 0.0 |
| user_df['location_northern_california'] = 1.0 |
| user_df['location_southern_california'] = 0.0 |
| user_df['job_encoded'] = 4.0 |
| user_df['pets_0'] = 1.0 |
| user_df['pets_1'] = 1.0 |
| user_df['pets_2'] = 1.0 |
| user_df['pets_3'] = 1.0 |
|
|
| |
| tfidf_df = pd.DataFrame(tfidf.transform([user_df['bio']]).toarray(), columns=feature_names) |
|
|
| |
| |
| user_df = pd.DataFrame(user_df, index=[0]) |
| user_df.drop("bio", axis=1, inplace=True) |
| user_df = pd.concat([user_df, tfidf_df], axis=1) |
|
|
| suggested_name = recommendOne(user_df) |
|
|
| |
| return suggested_name |
|
|
| def greet_test(name, str2): |
| return "Hello " + name + "!!" + " str2=" + str2 |
|
|
| |
| |
| tinder_df = pd.read_csv("tinder_data.csv") |
|
|
| |
| tinder_df['num_languages'] = tinder_df['language']\ |
| .str.count(',') + 1 |
| tinder_df.drop(["language"], axis=1, inplace=True) |
|
|
| place_type_strength = { |
| 'anywhere': 1.0, |
| 'same state': 2.0, |
| 'same city': 2.5 |
| } |
|
|
| tinder_df['location_preference'] = \ |
| tinder_df['location_preference']\ |
| .apply(lambda x: place_type_strength[x]) |
|
|
| two_unique_values_column = { |
| 'sex': {'f': 1, 'm': 0}, |
| 'dropped_out': {'no': 0, 'yes': 1} |
| } |
|
|
| tinder_df.replace(two_unique_values_column, |
| inplace=True) |
|
|
| status_type_strength = { |
| 'single': 2.0, |
| 'available': 2.0, |
| 'seeing someone': 1.0, |
| 'married': 1.0 |
| } |
| tinder_df['status'] = tinder_df['status']\ |
| .apply(lambda x: |
| status_type_strength[x]) |
|
|
| |
| orientation_encoder = LabelEncoder() |
|
|
| |
| orientation_encoder.fit(tinder_df['orientation']) |
|
|
| |
| tinder_df['orientation'] = orientation_encoder.\ |
| transform(tinder_df['orientation']) |
|
|
| |
| tinder_df.drop("orientation", axis=1, inplace=True) |
|
|
| drinking_habit = { |
| 'socially': 'sometimes', |
| 'rarely': 'sometimes', |
| 'not at all': 'do not drink', |
| 'often': 'drinks often', |
| 'very often': 'drinks often', |
| 'desperately': 'drinks often' |
| } |
| tinder_df['drinks'] = tinder_df['drinks']\ |
| .apply(lambda x: |
| drinking_habit[x]) |
| |
| habit_encoder = LabelEncoder() |
|
|
| |
| habit_encoder.fit(tinder_df[['drinks', 'drugs']] |
| .values.reshape(-1)) |
|
|
| |
| |
| tinder_df['drinks_encoded'] = \ |
| habit_encoder.transform(tinder_df['drinks']) |
| tinder_df['drugs_encoded'] = \ |
| habit_encoder.transform(tinder_df['drugs']) |
|
|
| |
| tinder_df.drop(["drinks", "drugs"], axis=1, |
| inplace=True) |
|
|
| region_dict = {'southern_california': ['los angeles', |
| 'san diego', 'hacienda heights', |
| 'north hollywood', 'phoenix'], |
| 'new_york': ['brooklyn', |
| 'new york']} |
|
|
| def get_region(city): |
| for region, cities in region_dict.items(): |
| if city.lower() in [c.lower() for c in cities]: |
| return region |
| return "northern_california" |
|
|
|
|
| tinder_df['location'] = tinder_df['location']\ |
| .str.split(', ')\ |
| .str[0].apply(get_region) |
| |
| location_encoder = OneHotEncoder() |
|
|
| |
| location_encoded = location_encoder.fit_transform\ |
| (tinder_df[['location']]) |
|
|
| |
| location_encoded_df = pd.DataFrame(location_encoded.toarray()\ |
| , columns=location_encoder.\ |
| get_feature_names_out(['location'])) |
|
|
| |
| tinder_df = pd.concat([tinder_df, location_encoded_df], axis=1) |
| |
| tinder_df.drop(["location"], axis=1, inplace=True) |
|
|
| |
| job_encoder = LabelEncoder() |
|
|
| |
| job_encoder.fit(tinder_df['job']) |
|
|
| |
| tinder_df['job_encoded'] = job_encoder.\ |
| transform(tinder_df['job']) |
|
|
| |
| tinder_df.drop('job', axis=1, inplace=True) |
|
|
| smokes = { |
| 'no': 1.0, |
| 'sometimes': 0, |
| 'yes': 0, |
| 'when drinking':0, |
| 'trying to quit':0 |
| } |
| tinder_df['smokes'] = tinder_df['smokes']\ |
| .apply(lambda x: smokes[x]) |
|
|
| bin_enc = ce.BinaryEncoder(cols=['pets']) |
|
|
| |
| pet_enc = bin_enc.fit_transform(tinder_df['pets']) |
|
|
| |
| tinder_df = pd.concat([tinder_df, pet_enc], axis=1) |
|
|
| tinder_df.drop("pets",axis=1,inplace = True) |
|
|
| |
| location_encoder = LabelEncoder() |
|
|
| |
| location_encoder.fit(tinder_df['new_languages']) |
|
|
| |
| tinder_df['new_languages'] = location_encoder.transform( |
| tinder_df['new_languages']) |
|
|
| |
| le = LabelEncoder() |
|
|
| |
| tinder_df["body_profile"] = le.fit_transform(tinder_df["body_profile"]) |
|
|
| |
| tfidf = TfidfVectorizer(stop_words='english') |
|
|
| |
| tfidf_matrix = tfidf.fit_transform(tinder_df['bio']) |
|
|
| |
| feature_names = tfidf.vocabulary_ |
|
|
| |
| tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), |
| columns=feature_names) |
|
|
| |
| tinder_dfs = tinder_df.drop(["bio", "user_id", |
| "username"], axis=1) |
| tinder_dfs = pd.concat([tinder_dfs, |
| tfidf_df], axis=1) |
|
|
| |
| svd = TruncatedSVD(n_components=100) |
| |
|
|
| svd_matrix = svd.fit_transform(tinder_dfs) |
|
|
|
|
|
|
| |
| |
| cosine_sim = cosine_similarity(svd_matrix) |
|
|
| def recommend(user_df, num_recommendations=5): |
|
|
| |
| |
| svd_matrixs = svd.transform(user_df) |
|
|
| |
| |
| cosine_sim_new = cosine_similarity(svd_matrixs, svd_matrix) |
|
|
| |
| |
| sim_scores = list(enumerate(cosine_sim_new[0])) |
| sim_scores = sorted(sim_scores, |
| key=lambda x: x[1], reverse=True) |
| sim_indices = [i[0] for i in |
| sim_scores[1:num_recommendations+1]] |
|
|
| |
| return tinder_df['username'].iloc[sim_indices] |
|
|
| def recommendOne(user_df, num_recommendations=1): |
|
|
| |
| |
| svd_matrixs = svd.transform(user_df) |
|
|
| |
| |
| cosine_sim_new = cosine_similarity(svd_matrixs, svd_matrix) |
|
|
| |
| |
| sim_scores = list(enumerate(cosine_sim_new[0])) |
| sim_scores = sorted(sim_scores, |
| key=lambda x: x[1], reverse=True) |
| sim_indices = [i[0] for i in |
| sim_scores[1:num_recommendations+1]] |
| ser = tinder_df['username'].iloc[sim_indices] |
|
|
| return pd.Series(ser[sim_indices[0]])[0] |
|
|
| |
|
|
| iface = gr.Interface(fn=predict_match, inputs=["text", "text"], outputs="text") |
| iface.launch() |
|
|