|
|
"""
|
|
|
Attribution: https://github.com/AIPI540/AIPI540-Deep-Learning-Applications/
|
|
|
|
|
|
Jon Reifschneider
|
|
|
Brinnae Bent
|
|
|
|
|
|
"""
|
|
|
|
|
|
import streamlit as st
|
|
|
from PIL import Image
|
|
|
import numpy as np
|
|
|
import os
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import pandas as pd
|
|
|
import os
|
|
|
import json
|
|
|
import pandas as pd
|
|
|
import torch
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import torch.nn as nn
|
|
|
import torch.nn.functional as F
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
class NNColabFiltering(nn.Module):
|
|
|
|
|
|
def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):
|
|
|
super().__init__()
|
|
|
self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)
|
|
|
self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)
|
|
|
self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)
|
|
|
self.fc2 = nn.Linear(n_activations,1)
|
|
|
self.rating_range = rating_range
|
|
|
|
|
|
def forward(self, X):
|
|
|
|
|
|
embedded_users = self.user_embeddings(X[:,0])
|
|
|
embedded_items = self.item_embeddings(X[:,1])
|
|
|
|
|
|
embeddings = torch.cat([embedded_users,embedded_items],dim=1)
|
|
|
|
|
|
preds = self.fc1(embeddings)
|
|
|
preds = F.relu(preds)
|
|
|
preds = self.fc2(preds)
|
|
|
|
|
|
preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]
|
|
|
return preds
|
|
|
|
|
|
def generate_recommendations(artist_album, playlists, model, playlist_id, device, top_n=10, batch_size=1024):
|
|
|
'''
|
|
|
Loads the prefetched data from the output dir
|
|
|
|
|
|
Inputs:
|
|
|
artist_album: the dataframe containing the mappings for the artist and albums
|
|
|
playlists: the dataframe containing the playlists contents
|
|
|
model: the trained model
|
|
|
playlist_id: the playlist id to generate recommendation for
|
|
|
device: the gpu or cpu device define by torch
|
|
|
top_n: the number of recommendations to generate
|
|
|
batch_size: the batch size to use
|
|
|
|
|
|
Returns:
|
|
|
album: the recommended album
|
|
|
playlists: the recommended artist
|
|
|
'''
|
|
|
model.eval()
|
|
|
|
|
|
all_movie_ids = torch.tensor(artist_album['artist_album_id'].values, dtype=torch.long, device=device)
|
|
|
user_ids = torch.full((len(all_movie_ids),), playlist_id, dtype=torch.long, device=device)
|
|
|
|
|
|
all_predictions = torch.zeros(len(all_movie_ids), device=device)
|
|
|
|
|
|
with torch.no_grad():
|
|
|
for i in range(0, len(all_movie_ids), batch_size):
|
|
|
batch_user_ids = user_ids[i:i+batch_size]
|
|
|
batch_movie_ids = all_movie_ids[i:i+batch_size]
|
|
|
|
|
|
input_tensor = torch.stack([batch_user_ids, batch_movie_ids], dim=1)
|
|
|
batch_predictions = model(input_tensor).squeeze()
|
|
|
all_predictions[i:i+batch_size] = batch_predictions
|
|
|
|
|
|
predictions = all_predictions.cpu().numpy()
|
|
|
albums_listened = set(playlists.loc[playlists['playlist_id'] == playlist_id, 'artist_album_id'].tolist())
|
|
|
unlistened_mask = np.isin(artist_album['artist_album_id'].values, list(albums_listened), invert=True)
|
|
|
|
|
|
top_indices = np.argsort(predictions[unlistened_mask])[-top_n:][::-1]
|
|
|
recs = artist_album['artist_album_id'].values[unlistened_mask][top_indices]
|
|
|
|
|
|
recs_names = artist_album.loc[artist_album['artist_album_id'].isin(recs)]
|
|
|
album, artist = recs_names['album_name'].values, recs_names['artist_name'].values
|
|
|
|
|
|
return album.tolist(), artist.tolist()
|
|
|
|
|
|
|
|
|
def load_data():
|
|
|
'''
|
|
|
Loads the prefetched data from the output dir
|
|
|
|
|
|
Inputs:
|
|
|
|
|
|
Returns:
|
|
|
artist_album: pandas DataFrame with the best sentiment score
|
|
|
playlists: pandas DataFrame with the worst sentiment score
|
|
|
'''
|
|
|
artist_album = pd.read_csv(os.path.join(os.getcwd() + '/data/processed','artist_album.csv'))
|
|
|
artist_album = artist_album[['artist_album_id','artist_album','artist_name','album_name']].drop_duplicates()
|
|
|
playlists = pd.read_csv(os.path.join(os.getcwd() + '/data/processed','playlists.csv'))
|
|
|
|
|
|
return artist_album, playlists
|
|
|
|
|
|
artist_album, playlists = load_data()
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
model = torch.load('models/recommender.pt', map_location=device)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
st.header('Spotify Playlists')
|
|
|
|
|
|
img1, img2 = st.columns(2)
|
|
|
|
|
|
music_notes = Image.open('assets/music_notes.png')
|
|
|
img1.image(music_notes, use_column_width=True)
|
|
|
|
|
|
trumpet = Image.open('assets/trumpet.png')
|
|
|
img2.image(trumpet, use_column_width=True)
|
|
|
|
|
|
with st.sidebar:
|
|
|
playlist_name = st.selectbox(
|
|
|
"Playlist Selection",
|
|
|
( list(set(playlists['name'].dropna())) )
|
|
|
)
|
|
|
playlist_id = playlists['playlist_id'][playlists['name'] == playlist_name].values[0]
|
|
|
albums, artists = generate_recommendations(artist_album, playlists, model, playlist_id, device)
|
|
|
|
|
|
st.dataframe(data=playlists[['artist_name','album_name','track_name']][playlists['playlist_id'] == playlist_id])
|
|
|
|
|
|
st.write(f"*Recommendations for playlist:* {playlists['name'][playlists['playlist_id'] == playlist_id].values[0]}")
|
|
|
col1, col2 = st.columns(2)
|
|
|
with col1:
|
|
|
st.write(f'Artist')
|
|
|
with col2:
|
|
|
st.write(f'Album')
|
|
|
|
|
|
for album, artist in zip(albums, artists):
|
|
|
with col1:
|
|
|
st.write(f"**{artist}**")
|
|
|
with col2:
|
|
|
st.write(f"**{album}**")
|
|
|
|
|
|
|