Upload 10 files
Browse files- .gitattributes +2 -0
- recommendation_module_project/data/processed/artist_album.csv +3 -0
- recommendation_module_project/data/processed/playlists.csv +3 -0
- recommendation_module_project/data/raw/data/playlists_100.parquet +3 -0
- recommendation_module_project/data/raw/data/playlists_150.parquet +3 -0
- recommendation_module_project/data/raw/data/playlists_200.parquet +3 -0
- recommendation_module_project/data/raw/data/playlists_50.parquet +3 -0
- recommendation_module_project/model.ipynb +1076 -0
- recommendation_module_project/model_(2).ipynb +0 -0
- recommendation_module_project/model_(2).py +632 -0
- recommendation_module_project/recommender.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
recommendation_module_project/data/processed/artist_album.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
recommendation_module_project/data/processed/playlists.csv filter=lfs diff=lfs merge=lfs -text
|
recommendation_module_project/data/processed/artist_album.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:332fcb8cb088acbc5390f00e451b33575dadc63b467e4859dd9e532ef5819f73
|
| 3 |
+
size 106221612
|
recommendation_module_project/data/processed/playlists.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc20e6149aea14d381a19507b256dce9ecb56bf987af6f37ec59f70f3ee3d060
|
| 3 |
+
size 206998675
|
recommendation_module_project/data/raw/data/playlists_100.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df1c572599da09a2b9d22f424bf20ef954c5e2d368501ca6400e3fae13f20b2c
|
| 3 |
+
size 82850937
|
recommendation_module_project/data/raw/data/playlists_150.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c69aea4f0cb5b97e7539d567119f1b72346f83333ccca3d0feb79c249bdb84
|
| 3 |
+
size 82830471
|
recommendation_module_project/data/raw/data/playlists_200.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb3dd72495402bbf4ddf5ea270d042965ee3ed2b7ea7b7161c7ccb6db07654e5
|
| 3 |
+
size 83074496
|
recommendation_module_project/data/raw/data/playlists_50.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60d4db353205c231cbae85d0e6140fdb5f38c2d03dfd4a71b2be81e8ff581ca2
|
| 3 |
+
size 82772449
|
recommendation_module_project/model.ipynb
ADDED
|
@@ -0,0 +1,1076 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {
|
| 7 |
+
"id": "uq9k8YYUKjnp"
|
| 8 |
+
},
|
| 9 |
+
"outputs": [],
|
| 10 |
+
"source": [
|
| 11 |
+
"import os\n",
|
| 12 |
+
"import urllib.request\n",
|
| 13 |
+
"import zipfile\n",
|
| 14 |
+
"import json\n",
|
| 15 |
+
"import pandas as pd\n",
|
| 16 |
+
"import time\n",
|
| 17 |
+
"import torch\n",
|
| 18 |
+
"import numpy as np\n",
|
| 19 |
+
"import pandas as pd\n",
|
| 20 |
+
"import torch.nn as nn\n",
|
| 21 |
+
"import torch.nn.functional as F\n",
|
| 22 |
+
"import torch.optim as optim\n",
|
| 23 |
+
"from torch.utils.data import DataLoader, TensorDataset\n",
|
| 24 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 25 |
+
"import matplotlib.pyplot as plt"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": 2,
|
| 31 |
+
"metadata": {
|
| 32 |
+
"id": "L5h3Tsa0LIoo"
|
| 33 |
+
},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"def unzip_archive(filepath, dir_path):\n",
|
| 37 |
+
" with zipfile.ZipFile(f\"{filepath}\", 'r') as zip_ref:\n",
|
| 38 |
+
" zip_ref.extractall(dir_path)\n",
|
| 39 |
+
"\n",
|
| 40 |
+
"unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')\n"
|
| 41 |
+
]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"cell_type": "code",
|
| 45 |
+
"execution_count": 3,
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [],
|
| 48 |
+
"source": [
|
| 49 |
+
"import shutil\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"def make_dir(directory):\n",
|
| 52 |
+
" if os.path.exists(directory):\n",
|
| 53 |
+
" shutil.rmtree(directory)\n",
|
| 54 |
+
" os.makedirs(directory)\n",
|
| 55 |
+
" else:\n",
|
| 56 |
+
" os.makedirs(directory)\n",
|
| 57 |
+
" \n",
|
| 58 |
+
"directory = os.getcwd() + '/data/raw/data'\n",
|
| 59 |
+
"make_dir(directory)"
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"cell_type": "code",
|
| 64 |
+
"execution_count": 4,
|
| 65 |
+
"metadata": {},
|
| 66 |
+
"outputs": [],
|
| 67 |
+
"source": [
|
| 68 |
+
"cols = [\n",
|
| 69 |
+
" 'name',\n",
|
| 70 |
+
" 'pid',\n",
|
| 71 |
+
" 'num_followers',\n",
|
| 72 |
+
" 'pos',\n",
|
| 73 |
+
" 'artist_name',\n",
|
| 74 |
+
" 'track_name',\n",
|
| 75 |
+
" 'album_name'\n",
|
| 76 |
+
"]"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 5,
|
| 82 |
+
"metadata": {
|
| 83 |
+
"colab": {
|
| 84 |
+
"base_uri": "https://localhost:8080/"
|
| 85 |
+
},
|
| 86 |
+
"id": "qyCujIu8cDGg",
|
| 87 |
+
"outputId": "0964ace3-2916-49e3-eebf-2e08e61d95d9"
|
| 88 |
+
},
|
| 89 |
+
"outputs": [
|
| 90 |
+
{
|
| 91 |
+
"name": "stdout",
|
| 92 |
+
"output_type": "stream",
|
| 93 |
+
"text": [
|
| 94 |
+
"mpd.slice.188000-188999.json\t100/1000\t10.0%"
|
| 95 |
+
]
|
| 96 |
+
}
|
| 97 |
+
],
|
| 98 |
+
"source": [
|
| 99 |
+
"\n",
|
| 100 |
+
"directory = os.getcwd() + '/data/raw/playlists/data'\n",
|
| 101 |
+
"df = pd.DataFrame()\n",
|
| 102 |
+
"index = 0\n",
|
| 103 |
+
"# Loop through all files in the directory\n",
|
| 104 |
+
"for filename in os.listdir(directory):\n",
|
| 105 |
+
" # Check if the item is a file (not a subdirectory)\n",
|
| 106 |
+
" if os.path.isfile(os.path.join(directory, filename)):\n",
|
| 107 |
+
" if filename.find('.json') != -1 :\n",
|
| 108 |
+
" index += 1\n",
|
| 109 |
+
"\n",
|
| 110 |
+
" # Print the filename or perform operations on the file\n",
|
| 111 |
+
" print(f'\\r{filename}\\t{index}/1000\\t{((index/1000)*100):.1f}%', end='')\n",
|
| 112 |
+
"\n",
|
| 113 |
+
" # If you need the full file path, you can use:\n",
|
| 114 |
+
" full_path = os.path.join(directory, filename)\n",
|
| 115 |
+
"\n",
|
| 116 |
+
" with open(full_path, 'r') as file:\n",
|
| 117 |
+
" json_data = json.load(file)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
" temp = pd.DataFrame(json_data['playlists'])\n",
|
| 120 |
+
" expanded_df = temp.explode('tracks').reset_index(drop=True)\n",
|
| 121 |
+
"\n",
|
| 122 |
+
" # Normalize the JSON data\n",
|
| 123 |
+
" json_normalized = pd.json_normalize(expanded_df['tracks'])\n",
|
| 124 |
+
"\n",
|
| 125 |
+
" # Concatenate the original DataFrame with the normalized JSON data\n",
|
| 126 |
+
" result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)\n",
|
| 127 |
+
" \n",
|
| 128 |
+
" result = result[cols]\n",
|
| 129 |
+
"\n",
|
| 130 |
+
" df = pd.concat([df, result], axis=0, ignore_index=True)\n",
|
| 131 |
+
" \n",
|
| 132 |
+
" if index % 50 == 0:\n",
|
| 133 |
+
" df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')\n",
|
| 134 |
+
" del df\n",
|
| 135 |
+
" df = pd.DataFrame()\n",
|
| 136 |
+
" if index % 100 == 0:\n",
|
| 137 |
+
" break"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": 6,
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"outputs": [],
|
| 145 |
+
"source": [
|
| 146 |
+
"import pyarrow.parquet as pq\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"def read_parquet_folder(folder_path):\n",
|
| 149 |
+
" dataframes = []\n",
|
| 150 |
+
" for file in os.listdir(folder_path):\n",
|
| 151 |
+
" if file.endswith('.parquet'):\n",
|
| 152 |
+
" file_path = os.path.join(folder_path, file)\n",
|
| 153 |
+
" df = pd.read_parquet(file_path)\n",
|
| 154 |
+
" dataframes.append(df)\n",
|
| 155 |
+
" \n",
|
| 156 |
+
" return pd.concat(dataframes, ignore_index=True)\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"folder_path = os.getcwd() + '/data/raw/data'\n",
|
| 159 |
+
"df = read_parquet_folder(folder_path)"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 7,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"directory = os.getcwd() + '/data/raw/mappings'\n",
|
| 169 |
+
"make_dir(directory)"
|
| 170 |
+
]
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"cell_type": "code",
|
| 174 |
+
"execution_count": 8,
|
| 175 |
+
"metadata": {},
|
| 176 |
+
"outputs": [],
|
| 177 |
+
"source": [
|
| 178 |
+
"def create_ids(df, col, name):\n",
|
| 179 |
+
" # Create a dictionary mapping unique values to IDs\n",
|
| 180 |
+
" value_to_id = {val: i for i, val in enumerate(df[col].unique())}\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" # Create a new column with the IDs\n",
|
| 183 |
+
" df[f'{name}_id'] = df[col].map(value_to_id)\n",
|
| 184 |
+
" df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/{name}.csv')\n",
|
| 185 |
+
" # df = df.drop(col, axis=1)\n",
|
| 186 |
+
" return df"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"cell_type": "code",
|
| 191 |
+
"execution_count": 9,
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [],
|
| 194 |
+
"source": [
|
| 195 |
+
"df = create_ids(df, 'artist_name', 'artist')\n",
|
| 196 |
+
"df = create_ids(df, 'pid', 'playlist')\n",
|
| 197 |
+
"df = create_ids(df, 'track_name', 'song')\n",
|
| 198 |
+
"df = create_ids(df, 'album_name', 'album')"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"cell_type": "code",
|
| 203 |
+
"execution_count": 10,
|
| 204 |
+
"metadata": {},
|
| 205 |
+
"outputs": [],
|
| 206 |
+
"source": [
|
| 207 |
+
"df['artist_count'] = df.groupby(['playlist_id','artist_id'])['song_id'].transform('nunique')\n",
|
| 208 |
+
"df['album_count'] = df.groupby(['playlist_id','artist_id'])['album_id'].transform('nunique')\n",
|
| 209 |
+
"df['song_count'] = df.groupby(['playlist_id','artist_id'])['song_id'].transform('count')"
|
| 210 |
+
]
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"cell_type": "code",
|
| 214 |
+
"execution_count": 11,
|
| 215 |
+
"metadata": {},
|
| 216 |
+
"outputs": [],
|
| 217 |
+
"source": [
|
| 218 |
+
"df['playlist_songs'] = df.groupby(['playlist_id'])['pos'].transform('max')\n",
|
| 219 |
+
"df['playlist_songs'] += 1"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": 12,
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [],
|
| 227 |
+
"source": [
|
| 228 |
+
"df['artist_percent'] = df['artist_count'] / df['playlist_songs']\n",
|
| 229 |
+
"df['song_percent'] = df['song_count'] / df['playlist_songs']\n",
|
| 230 |
+
"df['album_percent'] = df['album_count'] / df['playlist_songs']"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"cell_type": "code",
|
| 235 |
+
"execution_count": 13,
|
| 236 |
+
"metadata": {},
|
| 237 |
+
"outputs": [
|
| 238 |
+
{
|
| 239 |
+
"data": {
|
| 240 |
+
"text/html": [
|
| 241 |
+
"<div>\n",
|
| 242 |
+
"<style scoped>\n",
|
| 243 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 244 |
+
" vertical-align: middle;\n",
|
| 245 |
+
" }\n",
|
| 246 |
+
"\n",
|
| 247 |
+
" .dataframe tbody tr th {\n",
|
| 248 |
+
" vertical-align: top;\n",
|
| 249 |
+
" }\n",
|
| 250 |
+
"\n",
|
| 251 |
+
" .dataframe thead th {\n",
|
| 252 |
+
" text-align: right;\n",
|
| 253 |
+
" }\n",
|
| 254 |
+
"</style>\n",
|
| 255 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 256 |
+
" <thead>\n",
|
| 257 |
+
" <tr style=\"text-align: right;\">\n",
|
| 258 |
+
" <th></th>\n",
|
| 259 |
+
" <th>name</th>\n",
|
| 260 |
+
" <th>pid</th>\n",
|
| 261 |
+
" <th>num_followers</th>\n",
|
| 262 |
+
" <th>pos</th>\n",
|
| 263 |
+
" <th>artist_name</th>\n",
|
| 264 |
+
" <th>track_name</th>\n",
|
| 265 |
+
" <th>album_name</th>\n",
|
| 266 |
+
" <th>artist_id</th>\n",
|
| 267 |
+
" <th>playlist_id</th>\n",
|
| 268 |
+
" <th>song_id</th>\n",
|
| 269 |
+
" <th>album_id</th>\n",
|
| 270 |
+
" <th>artist_count</th>\n",
|
| 271 |
+
" <th>album_count</th>\n",
|
| 272 |
+
" <th>song_count</th>\n",
|
| 273 |
+
" <th>playlist_songs</th>\n",
|
| 274 |
+
" <th>artist_percent</th>\n",
|
| 275 |
+
" <th>song_percent</th>\n",
|
| 276 |
+
" <th>album_percent</th>\n",
|
| 277 |
+
" </tr>\n",
|
| 278 |
+
" </thead>\n",
|
| 279 |
+
" <tbody>\n",
|
| 280 |
+
" <tr>\n",
|
| 281 |
+
" <th>212</th>\n",
|
| 282 |
+
" <td>throwbacks</td>\n",
|
| 283 |
+
" <td>143005</td>\n",
|
| 284 |
+
" <td>2</td>\n",
|
| 285 |
+
" <td>0</td>\n",
|
| 286 |
+
" <td>R. Kelly</td>\n",
|
| 287 |
+
" <td>Ignition - Remix</td>\n",
|
| 288 |
+
" <td>Chocolate Factory</td>\n",
|
| 289 |
+
" <td>108</td>\n",
|
| 290 |
+
" <td>5</td>\n",
|
| 291 |
+
" <td>203</td>\n",
|
| 292 |
+
" <td>152</td>\n",
|
| 293 |
+
" <td>1</td>\n",
|
| 294 |
+
" <td>1</td>\n",
|
| 295 |
+
" <td>1</td>\n",
|
| 296 |
+
" <td>193</td>\n",
|
| 297 |
+
" <td>0.005181</td>\n",
|
| 298 |
+
" <td>0.005181</td>\n",
|
| 299 |
+
" <td>0.005181</td>\n",
|
| 300 |
+
" </tr>\n",
|
| 301 |
+
" <tr>\n",
|
| 302 |
+
" <th>213</th>\n",
|
| 303 |
+
" <td>throwbacks</td>\n",
|
| 304 |
+
" <td>143005</td>\n",
|
| 305 |
+
" <td>2</td>\n",
|
| 306 |
+
" <td>1</td>\n",
|
| 307 |
+
" <td>Backstreet Boys</td>\n",
|
| 308 |
+
" <td>I Want It That Way</td>\n",
|
| 309 |
+
" <td>Original Album Classics</td>\n",
|
| 310 |
+
" <td>109</td>\n",
|
| 311 |
+
" <td>5</td>\n",
|
| 312 |
+
" <td>204</td>\n",
|
| 313 |
+
" <td>153</td>\n",
|
| 314 |
+
" <td>1</td>\n",
|
| 315 |
+
" <td>1</td>\n",
|
| 316 |
+
" <td>1</td>\n",
|
| 317 |
+
" <td>193</td>\n",
|
| 318 |
+
" <td>0.005181</td>\n",
|
| 319 |
+
" <td>0.005181</td>\n",
|
| 320 |
+
" <td>0.005181</td>\n",
|
| 321 |
+
" </tr>\n",
|
| 322 |
+
" <tr>\n",
|
| 323 |
+
" <th>214</th>\n",
|
| 324 |
+
" <td>throwbacks</td>\n",
|
| 325 |
+
" <td>143005</td>\n",
|
| 326 |
+
" <td>2</td>\n",
|
| 327 |
+
" <td>2</td>\n",
|
| 328 |
+
" <td>*NSYNC</td>\n",
|
| 329 |
+
" <td>Bye Bye Bye</td>\n",
|
| 330 |
+
" <td>No Strings Attached</td>\n",
|
| 331 |
+
" <td>110</td>\n",
|
| 332 |
+
" <td>5</td>\n",
|
| 333 |
+
" <td>205</td>\n",
|
| 334 |
+
" <td>154</td>\n",
|
| 335 |
+
" <td>1</td>\n",
|
| 336 |
+
" <td>1</td>\n",
|
| 337 |
+
" <td>1</td>\n",
|
| 338 |
+
" <td>193</td>\n",
|
| 339 |
+
" <td>0.005181</td>\n",
|
| 340 |
+
" <td>0.005181</td>\n",
|
| 341 |
+
" <td>0.005181</td>\n",
|
| 342 |
+
" </tr>\n",
|
| 343 |
+
" <tr>\n",
|
| 344 |
+
" <th>215</th>\n",
|
| 345 |
+
" <td>throwbacks</td>\n",
|
| 346 |
+
" <td>143005</td>\n",
|
| 347 |
+
" <td>2</td>\n",
|
| 348 |
+
" <td>3</td>\n",
|
| 349 |
+
" <td>Fountains Of Wayne</td>\n",
|
| 350 |
+
" <td>Stacy's Mom</td>\n",
|
| 351 |
+
" <td>Welcome Interstate Managers</td>\n",
|
| 352 |
+
" <td>111</td>\n",
|
| 353 |
+
" <td>5</td>\n",
|
| 354 |
+
" <td>206</td>\n",
|
| 355 |
+
" <td>155</td>\n",
|
| 356 |
+
" <td>1</td>\n",
|
| 357 |
+
" <td>1</td>\n",
|
| 358 |
+
" <td>1</td>\n",
|
| 359 |
+
" <td>193</td>\n",
|
| 360 |
+
" <td>0.005181</td>\n",
|
| 361 |
+
" <td>0.005181</td>\n",
|
| 362 |
+
" <td>0.005181</td>\n",
|
| 363 |
+
" </tr>\n",
|
| 364 |
+
" <tr>\n",
|
| 365 |
+
" <th>216</th>\n",
|
| 366 |
+
" <td>throwbacks</td>\n",
|
| 367 |
+
" <td>143005</td>\n",
|
| 368 |
+
" <td>2</td>\n",
|
| 369 |
+
" <td>4</td>\n",
|
| 370 |
+
" <td>Bowling For Soup</td>\n",
|
| 371 |
+
" <td>1985</td>\n",
|
| 372 |
+
" <td>A Hangover You Don't Deserve</td>\n",
|
| 373 |
+
" <td>112</td>\n",
|
| 374 |
+
" <td>5</td>\n",
|
| 375 |
+
" <td>207</td>\n",
|
| 376 |
+
" <td>156</td>\n",
|
| 377 |
+
" <td>1</td>\n",
|
| 378 |
+
" <td>1</td>\n",
|
| 379 |
+
" <td>1</td>\n",
|
| 380 |
+
" <td>193</td>\n",
|
| 381 |
+
" <td>0.005181</td>\n",
|
| 382 |
+
" <td>0.005181</td>\n",
|
| 383 |
+
" <td>0.005181</td>\n",
|
| 384 |
+
" </tr>\n",
|
| 385 |
+
" <tr>\n",
|
| 386 |
+
" <th>...</th>\n",
|
| 387 |
+
" <td>...</td>\n",
|
| 388 |
+
" <td>...</td>\n",
|
| 389 |
+
" <td>...</td>\n",
|
| 390 |
+
" <td>...</td>\n",
|
| 391 |
+
" <td>...</td>\n",
|
| 392 |
+
" <td>...</td>\n",
|
| 393 |
+
" <td>...</td>\n",
|
| 394 |
+
" <td>...</td>\n",
|
| 395 |
+
" <td>...</td>\n",
|
| 396 |
+
" <td>...</td>\n",
|
| 397 |
+
" <td>...</td>\n",
|
| 398 |
+
" <td>...</td>\n",
|
| 399 |
+
" <td>...</td>\n",
|
| 400 |
+
" <td>...</td>\n",
|
| 401 |
+
" <td>...</td>\n",
|
| 402 |
+
" <td>...</td>\n",
|
| 403 |
+
" <td>...</td>\n",
|
| 404 |
+
" <td>...</td>\n",
|
| 405 |
+
" </tr>\n",
|
| 406 |
+
" <tr>\n",
|
| 407 |
+
" <th>400</th>\n",
|
| 408 |
+
" <td>throwbacks</td>\n",
|
| 409 |
+
" <td>143005</td>\n",
|
| 410 |
+
" <td>2</td>\n",
|
| 411 |
+
" <td>188</td>\n",
|
| 412 |
+
" <td>JoJo</td>\n",
|
| 413 |
+
" <td>Too Little, Too Late - Radio Version</td>\n",
|
| 414 |
+
" <td>Too Little, Too Late</td>\n",
|
| 415 |
+
" <td>199</td>\n",
|
| 416 |
+
" <td>5</td>\n",
|
| 417 |
+
" <td>390</td>\n",
|
| 418 |
+
" <td>293</td>\n",
|
| 419 |
+
" <td>1</td>\n",
|
| 420 |
+
" <td>1</td>\n",
|
| 421 |
+
" <td>1</td>\n",
|
| 422 |
+
" <td>193</td>\n",
|
| 423 |
+
" <td>0.005181</td>\n",
|
| 424 |
+
" <td>0.005181</td>\n",
|
| 425 |
+
" <td>0.005181</td>\n",
|
| 426 |
+
" </tr>\n",
|
| 427 |
+
" <tr>\n",
|
| 428 |
+
" <th>401</th>\n",
|
| 429 |
+
" <td>throwbacks</td>\n",
|
| 430 |
+
" <td>143005</td>\n",
|
| 431 |
+
" <td>2</td>\n",
|
| 432 |
+
" <td>189</td>\n",
|
| 433 |
+
" <td>Spice Girls</td>\n",
|
| 434 |
+
" <td>Wannabe - Radio Edit</td>\n",
|
| 435 |
+
" <td>Spice</td>\n",
|
| 436 |
+
" <td>200</td>\n",
|
| 437 |
+
" <td>5</td>\n",
|
| 438 |
+
" <td>391</td>\n",
|
| 439 |
+
" <td>294</td>\n",
|
| 440 |
+
" <td>1</td>\n",
|
| 441 |
+
" <td>1</td>\n",
|
| 442 |
+
" <td>1</td>\n",
|
| 443 |
+
" <td>193</td>\n",
|
| 444 |
+
" <td>0.005181</td>\n",
|
| 445 |
+
" <td>0.005181</td>\n",
|
| 446 |
+
" <td>0.005181</td>\n",
|
| 447 |
+
" </tr>\n",
|
| 448 |
+
" <tr>\n",
|
| 449 |
+
" <th>402</th>\n",
|
| 450 |
+
" <td>throwbacks</td>\n",
|
| 451 |
+
" <td>143005</td>\n",
|
| 452 |
+
" <td>2</td>\n",
|
| 453 |
+
" <td>190</td>\n",
|
| 454 |
+
" <td>MiMS</td>\n",
|
| 455 |
+
" <td>This Is Why I'm Hot</td>\n",
|
| 456 |
+
" <td>Music Is My Savior</td>\n",
|
| 457 |
+
" <td>201</td>\n",
|
| 458 |
+
" <td>5</td>\n",
|
| 459 |
+
" <td>392</td>\n",
|
| 460 |
+
" <td>295</td>\n",
|
| 461 |
+
" <td>1</td>\n",
|
| 462 |
+
" <td>1</td>\n",
|
| 463 |
+
" <td>1</td>\n",
|
| 464 |
+
" <td>193</td>\n",
|
| 465 |
+
" <td>0.005181</td>\n",
|
| 466 |
+
" <td>0.005181</td>\n",
|
| 467 |
+
" <td>0.005181</td>\n",
|
| 468 |
+
" </tr>\n",
|
| 469 |
+
" <tr>\n",
|
| 470 |
+
" <th>403</th>\n",
|
| 471 |
+
" <td>throwbacks</td>\n",
|
| 472 |
+
" <td>143005</td>\n",
|
| 473 |
+
" <td>2</td>\n",
|
| 474 |
+
" <td>191</td>\n",
|
| 475 |
+
" <td>Rihanna</td>\n",
|
| 476 |
+
" <td>Disturbia</td>\n",
|
| 477 |
+
" <td>Good Girl Gone Bad</td>\n",
|
| 478 |
+
" <td>115</td>\n",
|
| 479 |
+
" <td>5</td>\n",
|
| 480 |
+
" <td>393</td>\n",
|
| 481 |
+
" <td>296</td>\n",
|
| 482 |
+
" <td>3</td>\n",
|
| 483 |
+
" <td>3</td>\n",
|
| 484 |
+
" <td>3</td>\n",
|
| 485 |
+
" <td>193</td>\n",
|
| 486 |
+
" <td>0.015544</td>\n",
|
| 487 |
+
" <td>0.015544</td>\n",
|
| 488 |
+
" <td>0.015544</td>\n",
|
| 489 |
+
" </tr>\n",
|
| 490 |
+
" <tr>\n",
|
| 491 |
+
" <th>404</th>\n",
|
| 492 |
+
" <td>throwbacks</td>\n",
|
| 493 |
+
" <td>143005</td>\n",
|
| 494 |
+
" <td>2</td>\n",
|
| 495 |
+
" <td>192</td>\n",
|
| 496 |
+
" <td>DEV</td>\n",
|
| 497 |
+
" <td>Bass Down Low</td>\n",
|
| 498 |
+
" <td>The Night The Sun Came Up</td>\n",
|
| 499 |
+
" <td>179</td>\n",
|
| 500 |
+
" <td>5</td>\n",
|
| 501 |
+
" <td>394</td>\n",
|
| 502 |
+
" <td>264</td>\n",
|
| 503 |
+
" <td>2</td>\n",
|
| 504 |
+
" <td>1</td>\n",
|
| 505 |
+
" <td>2</td>\n",
|
| 506 |
+
" <td>193</td>\n",
|
| 507 |
+
" <td>0.010363</td>\n",
|
| 508 |
+
" <td>0.010363</td>\n",
|
| 509 |
+
" <td>0.005181</td>\n",
|
| 510 |
+
" </tr>\n",
|
| 511 |
+
" </tbody>\n",
|
| 512 |
+
"</table>\n",
|
| 513 |
+
"<p>193 rows × 18 columns</p>\n",
|
| 514 |
+
"</div>"
|
| 515 |
+
],
|
| 516 |
+
"text/plain": [
|
| 517 |
+
" name pid num_followers pos artist_name \\\n",
|
| 518 |
+
"212 throwbacks 143005 2 0 R. Kelly \n",
|
| 519 |
+
"213 throwbacks 143005 2 1 Backstreet Boys \n",
|
| 520 |
+
"214 throwbacks 143005 2 2 *NSYNC \n",
|
| 521 |
+
"215 throwbacks 143005 2 3 Fountains Of Wayne \n",
|
| 522 |
+
"216 throwbacks 143005 2 4 Bowling For Soup \n",
|
| 523 |
+
".. ... ... ... ... ... \n",
|
| 524 |
+
"400 throwbacks 143005 2 188 JoJo \n",
|
| 525 |
+
"401 throwbacks 143005 2 189 Spice Girls \n",
|
| 526 |
+
"402 throwbacks 143005 2 190 MiMS \n",
|
| 527 |
+
"403 throwbacks 143005 2 191 Rihanna \n",
|
| 528 |
+
"404 throwbacks 143005 2 192 DEV \n",
|
| 529 |
+
"\n",
|
| 530 |
+
" track_name album_name \\\n",
|
| 531 |
+
"212 Ignition - Remix Chocolate Factory \n",
|
| 532 |
+
"213 I Want It That Way Original Album Classics \n",
|
| 533 |
+
"214 Bye Bye Bye No Strings Attached \n",
|
| 534 |
+
"215 Stacy's Mom Welcome Interstate Managers \n",
|
| 535 |
+
"216 1985 A Hangover You Don't Deserve \n",
|
| 536 |
+
".. ... ... \n",
|
| 537 |
+
"400 Too Little, Too Late - Radio Version Too Little, Too Late \n",
|
| 538 |
+
"401 Wannabe - Radio Edit Spice \n",
|
| 539 |
+
"402 This Is Why I'm Hot Music Is My Savior \n",
|
| 540 |
+
"403 Disturbia Good Girl Gone Bad \n",
|
| 541 |
+
"404 Bass Down Low The Night The Sun Came Up \n",
|
| 542 |
+
"\n",
|
| 543 |
+
" artist_id playlist_id song_id album_id artist_count album_count \\\n",
|
| 544 |
+
"212 108 5 203 152 1 1 \n",
|
| 545 |
+
"213 109 5 204 153 1 1 \n",
|
| 546 |
+
"214 110 5 205 154 1 1 \n",
|
| 547 |
+
"215 111 5 206 155 1 1 \n",
|
| 548 |
+
"216 112 5 207 156 1 1 \n",
|
| 549 |
+
".. ... ... ... ... ... ... \n",
|
| 550 |
+
"400 199 5 390 293 1 1 \n",
|
| 551 |
+
"401 200 5 391 294 1 1 \n",
|
| 552 |
+
"402 201 5 392 295 1 1 \n",
|
| 553 |
+
"403 115 5 393 296 3 3 \n",
|
| 554 |
+
"404 179 5 394 264 2 1 \n",
|
| 555 |
+
"\n",
|
| 556 |
+
" song_count playlist_songs artist_percent song_percent album_percent \n",
|
| 557 |
+
"212 1 193 0.005181 0.005181 0.005181 \n",
|
| 558 |
+
"213 1 193 0.005181 0.005181 0.005181 \n",
|
| 559 |
+
"214 1 193 0.005181 0.005181 0.005181 \n",
|
| 560 |
+
"215 1 193 0.005181 0.005181 0.005181 \n",
|
| 561 |
+
"216 1 193 0.005181 0.005181 0.005181 \n",
|
| 562 |
+
".. ... ... ... ... ... \n",
|
| 563 |
+
"400 1 193 0.005181 0.005181 0.005181 \n",
|
| 564 |
+
"401 1 193 0.005181 0.005181 0.005181 \n",
|
| 565 |
+
"402 1 193 0.005181 0.005181 0.005181 \n",
|
| 566 |
+
"403 3 193 0.015544 0.015544 0.015544 \n",
|
| 567 |
+
"404 2 193 0.010363 0.010363 0.005181 \n",
|
| 568 |
+
"\n",
|
| 569 |
+
"[193 rows x 18 columns]"
|
| 570 |
+
]
|
| 571 |
+
},
|
| 572 |
+
"execution_count": 13,
|
| 573 |
+
"metadata": {},
|
| 574 |
+
"output_type": "execute_result"
|
| 575 |
+
}
|
| 576 |
+
],
|
| 577 |
+
"source": [
|
| 578 |
+
"df[df['playlist_id'] == 5]"
|
| 579 |
+
]
|
| 580 |
+
},
|
| 581 |
+
{
|
| 582 |
+
"cell_type": "code",
|
| 583 |
+
"execution_count": 14,
|
| 584 |
+
"metadata": {},
|
| 585 |
+
"outputs": [
|
| 586 |
+
{
|
| 587 |
+
"data": {
|
| 588 |
+
"text/html": [
|
| 589 |
+
"<div>\n",
|
| 590 |
+
"<style scoped>\n",
|
| 591 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 592 |
+
" vertical-align: middle;\n",
|
| 593 |
+
" }\n",
|
| 594 |
+
"\n",
|
| 595 |
+
" .dataframe tbody tr th {\n",
|
| 596 |
+
" vertical-align: top;\n",
|
| 597 |
+
" }\n",
|
| 598 |
+
"\n",
|
| 599 |
+
" .dataframe thead th {\n",
|
| 600 |
+
" text-align: right;\n",
|
| 601 |
+
" }\n",
|
| 602 |
+
"</style>\n",
|
| 603 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 604 |
+
" <thead>\n",
|
| 605 |
+
" <tr style=\"text-align: right;\">\n",
|
| 606 |
+
" <th></th>\n",
|
| 607 |
+
" <th>playlist_id</th>\n",
|
| 608 |
+
" <th>artist_id</th>\n",
|
| 609 |
+
" <th>artist_percent</th>\n",
|
| 610 |
+
" </tr>\n",
|
| 611 |
+
" </thead>\n",
|
| 612 |
+
" <tbody>\n",
|
| 613 |
+
" <tr>\n",
|
| 614 |
+
" <th>0</th>\n",
|
| 615 |
+
" <td>0</td>\n",
|
| 616 |
+
" <td>0</td>\n",
|
| 617 |
+
" <td>0.571429</td>\n",
|
| 618 |
+
" </tr>\n",
|
| 619 |
+
" <tr>\n",
|
| 620 |
+
" <th>1</th>\n",
|
| 621 |
+
" <td>0</td>\n",
|
| 622 |
+
" <td>0</td>\n",
|
| 623 |
+
" <td>0.571429</td>\n",
|
| 624 |
+
" </tr>\n",
|
| 625 |
+
" <tr>\n",
|
| 626 |
+
" <th>2</th>\n",
|
| 627 |
+
" <td>0</td>\n",
|
| 628 |
+
" <td>0</td>\n",
|
| 629 |
+
" <td>0.571429</td>\n",
|
| 630 |
+
" </tr>\n",
|
| 631 |
+
" <tr>\n",
|
| 632 |
+
" <th>3</th>\n",
|
| 633 |
+
" <td>0</td>\n",
|
| 634 |
+
" <td>0</td>\n",
|
| 635 |
+
" <td>0.571429</td>\n",
|
| 636 |
+
" </tr>\n",
|
| 637 |
+
" <tr>\n",
|
| 638 |
+
" <th>4</th>\n",
|
| 639 |
+
" <td>0</td>\n",
|
| 640 |
+
" <td>0</td>\n",
|
| 641 |
+
" <td>0.571429</td>\n",
|
| 642 |
+
" </tr>\n",
|
| 643 |
+
" </tbody>\n",
|
| 644 |
+
"</table>\n",
|
| 645 |
+
"</div>"
|
| 646 |
+
],
|
| 647 |
+
"text/plain": [
|
| 648 |
+
" playlist_id artist_id artist_percent\n",
|
| 649 |
+
"0 0 0 0.571429\n",
|
| 650 |
+
"1 0 0 0.571429\n",
|
| 651 |
+
"2 0 0 0.571429\n",
|
| 652 |
+
"3 0 0 0.571429\n",
|
| 653 |
+
"4 0 0 0.571429"
|
| 654 |
+
]
|
| 655 |
+
},
|
| 656 |
+
"execution_count": 14,
|
| 657 |
+
"metadata": {},
|
| 658 |
+
"output_type": "execute_result"
|
| 659 |
+
}
|
| 660 |
+
],
|
| 661 |
+
"source": [
|
| 662 |
+
"artists = df.loc[:,['playlist_id','artist_id','album_id','album_percent']]\n",
|
| 663 |
+
"artists.head()"
|
| 664 |
+
]
|
| 665 |
+
},
|
| 666 |
+
{
|
| 667 |
+
"cell_type": "code",
|
| 668 |
+
"execution_count": 15,
|
| 669 |
+
"metadata": {},
|
| 670 |
+
"outputs": [],
|
| 671 |
+
"source": [
|
| 672 |
+
"X = artists.loc[:,['playlist_id','artist_id','album_id']]\n",
|
| 673 |
+
"y = artists.loc[:,'album_percent']\n",
|
| 674 |
+
"\n",
|
| 675 |
+
"# Split our data into training and test sets\n",
|
| 676 |
+
"X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)"
|
| 677 |
+
]
|
| 678 |
+
},
|
| 679 |
+
{
|
| 680 |
+
"cell_type": "code",
|
| 681 |
+
"execution_count": 16,
|
| 682 |
+
"metadata": {},
|
| 683 |
+
"outputs": [],
|
| 684 |
+
"source": [
|
| 685 |
+
"def prep_dataloaders(X_train,y_train,X_val,y_val,batch_size):\n",
|
| 686 |
+
" # Convert training and test data to TensorDatasets\n",
|
| 687 |
+
" trainset = TensorDataset(torch.from_numpy(np.array(X_train)).long(), \n",
|
| 688 |
+
" torch.from_numpy(np.array(y_train)).float())\n",
|
| 689 |
+
" valset = TensorDataset(torch.from_numpy(np.array(X_val)).long(), \n",
|
| 690 |
+
" torch.from_numpy(np.array(y_val)).float())\n",
|
| 691 |
+
"\n",
|
| 692 |
+
" # Create Dataloaders for our training and test data to allow us to iterate over minibatches \n",
|
| 693 |
+
" trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n",
|
| 694 |
+
" valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)\n",
|
| 695 |
+
"\n",
|
| 696 |
+
" return trainloader, valloader\n",
|
| 697 |
+
"\n",
|
| 698 |
+
"batchsize = 64\n",
|
| 699 |
+
"trainloader,valloader = prep_dataloaders(X_train,y_train,X_val,y_val,batchsize)"
|
| 700 |
+
]
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"cell_type": "code",
|
| 704 |
+
"execution_count": 17,
|
| 705 |
+
"metadata": {},
|
| 706 |
+
"outputs": [],
|
| 707 |
+
"source": [
|
| 708 |
+
"class NNColabFiltering(nn.Module):\n",
|
| 709 |
+
" \n",
|
| 710 |
+
" def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):\n",
|
| 711 |
+
" super().__init__()\n",
|
| 712 |
+
" self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)\n",
|
| 713 |
+
" self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)\n",
|
| 714 |
+
" self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)\n",
|
| 715 |
+
" self.fc2 = nn.Linear(n_activations,1)\n",
|
| 716 |
+
" self.rating_range = rating_range\n",
|
| 717 |
+
"\n",
|
| 718 |
+
" def forward(self, X):\n",
|
| 719 |
+
" # Get embeddings for minibatch\n",
|
| 720 |
+
" embedded_users = self.user_embeddings(X[:,0])\n",
|
| 721 |
+
" embedded_items = self.item_embeddings(X[:,1])\n",
|
| 722 |
+
" # Concatenate user and item embeddings\n",
|
| 723 |
+
" embeddings = torch.cat([embedded_users,embedded_items],dim=1)\n",
|
| 724 |
+
" # Pass embeddings through network\n",
|
| 725 |
+
" preds = self.fc1(embeddings)\n",
|
| 726 |
+
" preds = F.relu(preds)\n",
|
| 727 |
+
" preds = self.fc2(preds)\n",
|
| 728 |
+
" # Scale predicted ratings to target-range [low,high]\n",
|
| 729 |
+
" preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]\n",
|
| 730 |
+
" return preds"
|
| 731 |
+
]
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"cell_type": "code",
|
| 735 |
+
"execution_count": 18,
|
| 736 |
+
"metadata": {},
|
| 737 |
+
"outputs": [],
|
| 738 |
+
"source": [
|
| 739 |
+
"class PMFRecommender(nn.Module):\n",
|
| 740 |
+
" \n",
|
| 741 |
+
" def __init__(self,n_users, n_items, embedding_dim ,rating_range):\n",
|
| 742 |
+
" super().__init__() \n",
|
| 743 |
+
" self.user_embeddings = nn.Embedding(num_embeddings=n_users,embedding_dim=embedding_dim) # user embeddings\n",
|
| 744 |
+
" self.user_bias = nn.Embedding(num_embeddings=n_users,embedding_dim=1) # user bias\n",
|
| 745 |
+
" self.item_embeddings = nn.Embedding(num_embeddings=n_items,embedding_dim=embedding_dim) # item embeddings\n",
|
| 746 |
+
" self.item_bias = nn.Embedding(num_embeddings=n_items,embedding_dim=1) # item bias\n",
|
| 747 |
+
" self.rating_range = rating_range # range of expected ratings e.g. 0-5\n",
|
| 748 |
+
"\n",
|
| 749 |
+
" def forward(self, X):\n",
|
| 750 |
+
" embedded_users = self.user_embeddings(X[:,0]) # dims = [batch_size, embedding_dim]\n",
|
| 751 |
+
" embedded_items = self.item_embeddings(X[:,1]) # dims = [batch_size, embedding_dim]\n",
|
| 752 |
+
" # Take dot product of each user embedding with the embedding of item to be rated to get the predicted rating\n",
|
| 753 |
+
" preds = torch.sum(embedded_users * embedded_items, dim=1, keepdim=True) \n",
|
| 754 |
+
" # Add user and item bias to rating\n",
|
| 755 |
+
" preds = preds.view(-1,1) + self.user_bias(X[:,0]) + self.item_bias(X[:,1])\n",
|
| 756 |
+
" # Scale predicted ratings to target-range [low,high]\n",
|
| 757 |
+
" preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]\n",
|
| 758 |
+
" return preds\n",
|
| 759 |
+
" "
|
| 760 |
+
]
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"cell_type": "code",
|
| 764 |
+
"execution_count": null,
|
| 765 |
+
"metadata": {},
|
| 766 |
+
"outputs": [],
|
| 767 |
+
"source": [
|
| 768 |
+
"class NNHybridFiltering(nn.Module):\n",
|
| 769 |
+
" \n",
|
| 770 |
+
" def __init__(self, n_users, n_items, n_genres, embdim_users, embdim_items, embdim_genres, n_activations, rating_range):\n",
|
| 771 |
+
" super().__init__()\n",
|
| 772 |
+
" self.user_embeddings = nn.Embedding(num_embeddings=n_users,embedding_dim=embdim_users)\n",
|
| 773 |
+
" self.item_embeddings = nn.Embedding(num_embeddings=n_items,embedding_dim=embdim_items)\n",
|
| 774 |
+
" self.genre_embeddings = nn.Embedding(num_embeddings=n_genres,embedding_dim=embdim_genres)\n",
|
| 775 |
+
" self.fc1 = nn.Linear(embdim_users+embdim_items+embdim_genres,n_activations)\n",
|
| 776 |
+
" self.fc2 = nn.Linear(n_activations,1)\n",
|
| 777 |
+
" self.rating_range = rating_range\n",
|
| 778 |
+
"\n",
|
| 779 |
+
" def forward(self, X):\n",
|
| 780 |
+
" # Get embeddings for minibatch\n",
|
| 781 |
+
" embedded_users = self.user_embeddings(X[:,0])\n",
|
| 782 |
+
" embedded_items = self.item_embeddings(X[:,1])\n",
|
| 783 |
+
" embedded_genres = self.genre_embeddings(X[:,2])\n",
|
| 784 |
+
" # Concatenate user, item and genre embeddings\n",
|
| 785 |
+
" embeddings = torch.cat([embedded_users,embedded_items,embedded_genres],dim=1)\n",
|
| 786 |
+
" # Pass embeddings through network\n",
|
| 787 |
+
" preds = self.fc1(embeddings)\n",
|
| 788 |
+
" preds = F.relu(preds)\n",
|
| 789 |
+
" preds = self.fc2(preds)\n",
|
| 790 |
+
" # Scale predicted ratings to target-range [low,high]\n",
|
| 791 |
+
" preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]\n",
|
| 792 |
+
" return preds\n",
|
| 793 |
+
" "
|
| 794 |
+
]
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"cell_type": "code",
|
| 798 |
+
"execution_count": 19,
|
| 799 |
+
"metadata": {},
|
| 800 |
+
"outputs": [],
|
| 801 |
+
"source": [
|
| 802 |
+
"def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=5, scheduler=None):\n",
|
| 803 |
+
" model = model.to(device) # Send model to GPU if available\n",
|
| 804 |
+
" since = time.time()\n",
|
| 805 |
+
"\n",
|
| 806 |
+
" costpaths = {'train':[],'val':[]}\n",
|
| 807 |
+
"\n",
|
| 808 |
+
" for epoch in range(num_epochs):\n",
|
| 809 |
+
" print('Epoch {}/{}'.format(epoch, num_epochs - 1))\n",
|
| 810 |
+
" print('-' * 10)\n",
|
| 811 |
+
"\n",
|
| 812 |
+
" # Each epoch has a training and validation phase\n",
|
| 813 |
+
" for phase in ['train', 'val']:\n",
|
| 814 |
+
" if phase == 'train':\n",
|
| 815 |
+
" model.train() # Set model to training mode\n",
|
| 816 |
+
" else:\n",
|
| 817 |
+
" model.eval() # Set model to evaluate mode\n",
|
| 818 |
+
"\n",
|
| 819 |
+
" running_loss = 0.0\n",
|
| 820 |
+
"\n",
|
| 821 |
+
" # Get the inputs and labels, and send to GPU if available\n",
|
| 822 |
+
" index = 0\n",
|
| 823 |
+
" for (inputs,labels) in dataloaders[phase]:\n",
|
| 824 |
+
" inputs = inputs.to(device)\n",
|
| 825 |
+
" labels = labels.to(device)\n",
|
| 826 |
+
"\n",
|
| 827 |
+
" # Zero the weight gradients\n",
|
| 828 |
+
" optimizer.zero_grad()\n",
|
| 829 |
+
"\n",
|
| 830 |
+
" # Forward pass to get outputs and calculate loss\n",
|
| 831 |
+
" # Track gradient only for training data\n",
|
| 832 |
+
" with torch.set_grad_enabled(phase == 'train'):\n",
|
| 833 |
+
" outputs = model.forward(inputs).view(-1)\n",
|
| 834 |
+
" loss = criterion(outputs, labels)\n",
|
| 835 |
+
"\n",
|
| 836 |
+
" # Backpropagation to get the gradients with respect to each weight\n",
|
| 837 |
+
" # Only if in train\n",
|
| 838 |
+
" if phase == 'train':\n",
|
| 839 |
+
" loss.backward()\n",
|
| 840 |
+
" # Update the weights\n",
|
| 841 |
+
" optimizer.step()\n",
|
| 842 |
+
"\n",
|
| 843 |
+
" # Convert loss into a scalar and add it to running_loss\n",
|
| 844 |
+
" running_loss += np.sqrt(loss.item()) * labels.size(0)\n",
|
| 845 |
+
" print(f'\\r{running_loss} {index} {index / len(dataloaders[phase])}', end='')\n",
|
| 846 |
+
" index +=1\n",
|
| 847 |
+
"\n",
|
| 848 |
+
" # Step along learning rate scheduler when in train\n",
|
| 849 |
+
" if (phase == 'train') and (scheduler is not None):\n",
|
| 850 |
+
" scheduler.step()\n",
|
| 851 |
+
"\n",
|
| 852 |
+
" # Calculate and display average loss and accuracy for the epoch\n",
|
| 853 |
+
" epoch_loss = running_loss / len(dataloaders[phase].dataset)\n",
|
| 854 |
+
" costpaths[phase].append(epoch_loss)\n",
|
| 855 |
+
" print('{} loss: {:.4f}'.format(phase, epoch_loss))\n",
|
| 856 |
+
"\n",
|
| 857 |
+
" time_elapsed = time.time() - since\n",
|
| 858 |
+
" print('Training complete in {:.0f}m {:.0f}s'.format(\n",
|
| 859 |
+
" time_elapsed // 60, time_elapsed % 60))\n",
|
| 860 |
+
"\n",
|
| 861 |
+
" return costpaths"
|
| 862 |
+
]
|
| 863 |
+
},
|
| 864 |
+
{
|
| 865 |
+
"cell_type": "code",
|
| 866 |
+
"execution_count": null,
|
| 867 |
+
"metadata": {},
|
| 868 |
+
"outputs": [],
|
| 869 |
+
"source": [
|
| 870 |
+
"# Train the model\n",
|
| 871 |
+
"dataloaders = {'train':trainloader, 'val':valloader}\n",
|
| 872 |
+
"n_users = X.loc[:,'playlist_id'].max()+1\n",
|
| 873 |
+
"n_items = X.loc[:,'artist_id'].max()+1\n",
|
| 874 |
+
"n_genres = X.loc[:,'album_id'].max()+1\n",
|
| 875 |
+
"model = NNHybridFiltering(n_users,\n",
|
| 876 |
+
" n_items,\n",
|
| 877 |
+
" n_genres,\n",
|
| 878 |
+
" embdim_users=50, \n",
|
| 879 |
+
" embdim_items=50, \n",
|
| 880 |
+
" embdim_genres=25,\n",
|
| 881 |
+
" n_activations = 100,\n",
|
| 882 |
+
" rating_range=[0.,1.])\n",
|
| 883 |
+
"criterion = nn.MSELoss()\n",
|
| 884 |
+
"lr=0.001\n",
|
| 885 |
+
"n_epochs=10\n",
|
| 886 |
+
"wd=1e-3\n",
|
| 887 |
+
"optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)\n",
|
| 888 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 889 |
+
"\n",
|
| 890 |
+
"cost_paths = train_model(model,criterion,optimizer,dataloaders, device,n_epochs, scheduler=None)"
|
| 891 |
+
]
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"cell_type": "code",
|
| 895 |
+
"execution_count": 20,
|
| 896 |
+
"metadata": {},
|
| 897 |
+
"outputs": [
|
| 898 |
+
{
|
| 899 |
+
"name": "stdout",
|
| 900 |
+
"output_type": "stream",
|
| 901 |
+
"text": [
|
| 902 |
+
"Epoch 0/0\n",
|
| 903 |
+
"----------\n",
|
| 904 |
+
"620559.3337308276 83563 0.999988033124312train loss: 0.1160\n",
|
| 905 |
+
"153259.98286122305 20890 0.9999521324972476val loss: 0.1146\n",
|
| 906 |
+
"Training complete in 18m 4s\n"
|
| 907 |
+
]
|
| 908 |
+
}
|
| 909 |
+
],
|
| 910 |
+
"source": [
|
| 911 |
+
"# Train the model\n",
|
| 912 |
+
"dataloaders = {'train':trainloader, 'val':valloader}\n",
|
| 913 |
+
"n_playlists = X.loc[:,'playlist_id'].max()+1\n",
|
| 914 |
+
"n_artists = X.loc[:,'artist_id'].max()+1\n",
|
| 915 |
+
"model = NNColabFiltering(n_playlists,n_artists,embedding_dim_users=1, embedding_dim_items=1, n_activations = 5,rating_range=[0.,1.])\n",
|
| 916 |
+
"criterion = nn.MSELoss()\n",
|
| 917 |
+
"lr=0.001\n",
|
| 918 |
+
"n_epochs=1\n",
|
| 919 |
+
"wd=1e-3\n",
|
| 920 |
+
"optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)\n",
|
| 921 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 922 |
+
"\n",
|
| 923 |
+
"cost_paths = train_model(model,criterion,optimizer,dataloaders, device,n_epochs, scheduler=None)"
|
| 924 |
+
]
|
| 925 |
+
},
|
| 926 |
+
{
|
| 927 |
+
"cell_type": "code",
|
| 928 |
+
"execution_count": 21,
|
| 929 |
+
"metadata": {},
|
| 930 |
+
"outputs": [
|
| 931 |
+
{
|
| 932 |
+
"data": {
|
| 933 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAABOMAAAHWCAYAAAA1l01kAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABN0UlEQVR4nO3df1RU953/8dcAMhCRMUiEoODo0vzQNGARCE0adMsGXVejkg2xaUS2J64bNUsmdaObRpI2PZiEGpJIdbunhk3bRJpsNTZpyQ8Mmh8YKhxiFGNiv0YNdACbOhMwAsvc7x+pk0wFRZy5I/J8nHNP4TOfuZ/3vaPtuy/v3GsxDMMQAAAAAAAAgIALCXYBAAAAAAAAwHBBGAcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgAAAAAAADAJIRxAAAAAAAAgEkI4wAAAAAAAACTEMYBAAAAAAAAJiGMAwAAAAAAAExCGAcAJrHb7Vq8eHGwywAAAECAVVRUyGKx6OOPPw52KQAuQIRxAPAV77zzjh588EEdP3482KUAAAAAAC5CYcEuAAAuJO+8844eeughLV68WKNHj/brvg8cOKCQEP4NBAAAAACGM/5fIQAMgsfj0cmTJ8/pPVarVSNGjAhQRQAAAACAoYAwDgD+6sEHH9TKlSslSRMnTpTFYvHe68NisWj58uX61a9+pSlTpshqtaqqqkqSVFpaqm9+85saM2aMIiMjlZaWphdeeOG0/f/tPeNO3Uvk7bfflsPh0GWXXaaRI0dq/vz5am9vN+WYAQAAIL3wwguyWCzasWPHaa/913/9lywWi/bu3as9e/Zo8eLFmjRpkiIiIhQfH69/+Zd/0Z///OcgVA1gqOJrqgDwVwsWLNCHH36o5557To8//rhiY2MlSZdddpkkafv27fr1r3+t5cuXKzY2Vna7XZL0xBNPaO7cubr99tvV3d2tzZs365//+Z/10ksvafbs2Wddd8WKFbr00ktVXFysjz/+WGVlZVq+fLkqKysDdqwAAAD40uzZsxUVFaVf//rXys7O9nmtsrJSU6ZM0TXXXKOf/OQn+n//7/+psLBQ8fHx2rdvn372s59p37592rVrlywWS5COAMBQQhgHAH917bXX6hvf+Iaee+45zZs3zxu2nXLgwAG9//77mjx5ss/4hx9+qMjISO/vy5cv1ze+8Q2tW7duQGHcmDFj9Oqrr3qbN4/HoyeffFIul0s2m+38DwwAAABnFBkZqTlz5uiFF17Qk08+qdDQUEmS0+nUjh079OCDD0qS7rrrLt17770+773uuuu0cOFCvfXWW/rWt75ldukAhiC+pgoAA5SdnX1aECfJJ4j7y1/+IpfLpW9961tqaGgY0H6XLFni86+o3/rWt9Tb26vDhw+ff9EAAAAYkPz8fLW1tammpsY79sILL8jj8Sg/P1+Sb9938uRJHTt2TNddd50kDbj3AwDCOAAYoIkTJ/Y5/tJLL+m6665TRESEYmJidNlll2nDhg1yuVwD2m9SUpLP75deeqmkL4I9AAAAmGPmzJmy2Ww+twqprKxUamqqrrjiCknSp59+qn//939XXFycIiMjddlll3l7xIH2fgBAGAcAA/TVfwk95c0339TcuXMVERGhn/70p/rd736n1157Td/5zndkGMaA9nvqaxB/a6DvBwAAwPmzWq2aN2+etmzZov/7v/9Tc3Oz3n77be9VcZJ066236r//+7+1dOlS/eY3v9Grr77qfaiXx+MJVukAhhjuGQcAX3GuN9393//9X0VEROiVV16R1Wr1jj/99NP+Lg0AAAABlp+fr//5n/9RdXW19u/fL8MwvGHcX/7yF1VXV+uhhx7SmjVrvO/56KOPglUugCGKMA4AvmLkyJGSpOPHjw9ofmhoqCwWi3p7e71jH3/8sbZu3RqA6gAAABBIOTk5iomJUWVlpfbv36+MjAzv11BPfZvhb7+9UFZWZnaZAIY4wjgA+Iq0tDRJ0v3336/bbrtNI0aM0Jw5c/qdP3v2bK1bt04zZ87Ud77zHbW1tam8vFzJycnas2ePWWUDAADAD0aMGKEFCxZo8+bN6uzsVGlpqfe16Oho3XjjjXr00UfV09OjcePG6dVXX9WhQ4eCWDGAoYh7xgHAV6Snp+tHP/qR3nvvPS1evFgLFy5Ue3t7v/P//u//Xj//+c/ldDpVVFSk5557To888ojmz59vYtUAAADwl/z8fHV0dEj64h5xX/Xss88qNzdX5eXlWr16tUaMGKHf//73wSgTwBBmMbhDOAAAAAAAAGAKrowDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACAScKCXcBQ5fF41NLSolGjRslisQS7HAAAMAQYhqHPPvtMCQkJCgnh30QvVPR5AABgMAba6xHGDVJLS4sSExODXQYAABiCjh49qvHjxwe7DPSDPg8AAJyPs/V6hHGDNGrUKElfnODo6OggVwMAAIYCt9utxMREbx+BCxN9HgAAGIyB9nqEcYN06isL0dHRNGkAAOCc8NXHCxt9HgAAOB9n6/W4WQkAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACa5IMK48vJy2e12RUREKDMzU3V1df3O3bdvn/Ly8mS322WxWFRWVnbanJKSEqWnp2vUqFEaO3as5s2bpwMHDnhf//TTT7VixQpdeeWVioyMVFJSku6++265XK5AHB4AAAAAAAAg6QII4yorK+VwOFRcXKyGhgalpKQoNzdXbW1tfc4/ceKEJk2apLVr1yo+Pr7POTt27NCyZcu0a9cuvfbaa+rp6dFNN92kzs5OSVJLS4taWlpUWlqqvXv3qqKiQlVVVfre974XsOMEAAAAAAAALIZhGMEsIDMzU+np6Vq/fr0kyePxKDExUStWrNCqVavO+F673a6ioiIVFRWdcV57e7vGjh2rHTt26MYbb+xzzvPPP6/vfve76uzsVFhY2Fnrdrvdstlscrlcio6OPut8AAAA+oehgc8JAAAMxkB7iKBeGdfd3a36+nrl5OR4x0JCQpSTk6Pa2lq/rXPq66cxMTFnnBMdHd1vENfV1SW32+2zAQAAAAAAAOciqGHcsWPH1Nvbq7i4OJ/xuLg4OZ1Ov6zh8XhUVFSk66+/Xtdcc02/dfzoRz/SkiVL+t1PSUmJbDabd0tMTPRLfQAAAAAAABg+gn7PuEBbtmyZ9u7dq82bN/f5utvt1uzZszV58mQ9+OCD/e5n9erVcrlc3u3o0aMBqhgAAAAAAAAXq7PfHC2AYmNjFRoaqtbWVp/x1tbWfh/OcC6WL1+ul156STt37tT48eNPe/2zzz7TzJkzNWrUKG3ZskUjRozod19Wq1VWq/W8awIAAAAAAMDwFdQr48LDw5WWlqbq6mrvmMfjUXV1tbKysga9X8MwtHz5cm3ZskXbt2/XxIkTT5vjdrt10003KTw8XNu2bVNERMSg1wMAAAAAAAAGIqhXxkmSw+FQQUGBpk2bpoyMDJWVlamzs1OFhYWSpEWLFmncuHEqKSmR9MVDH5qamrw/Nzc3q7GxUVFRUUpOTpb0xVdTn332Wb344osaNWqU9/5zNptNkZGR3iDuxIkT+uUvf+nzQIbLLrtMoaGhZp8GAAAAAAAADANBD+Py8/PV3t6uNWvWyOl0KjU1VVVVVd6HOhw5ckQhIV9ewNfS0qKpU6d6fy8tLVVpaamys7NVU1MjSdqwYYMkafr06T5rPf3001q8eLEaGhr07rvvSpI3wDvl0KFDstvtfj5KAAAAAAAAQLIYhmEEu4ihyO12y2azyeVyKTo6OtjlAACAIYD+YWjgcwIAAIMx0B7ion+aKgAAAAAAAHChIIwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAARMeXm57Ha7IiIilJmZqbq6un7n7tu3T3l5ebLb7bJYLCorKzttTklJidLT0zVq1CiNHTtW8+bN04EDB3zmnDx5UsuWLdOYMWMUFRWlvLw8tba2+vvQAAAABoUwDgAAAAFRWVkph8Oh4uJiNTQ0KCUlRbm5uWpra+tz/okTJzRp0iStXbtW8fHxfc7ZsWOHli1bpl27dum1115TT0+PbrrpJnV2dnrn3HPPPfrtb3+r559/Xjt27FBLS4sWLFgQkGMEAAA4VxbDMIxgFzEUud1u2Ww2uVwuRUdHB7scAAAwBAy3/iEzM1Pp6elav369JMnj8SgxMVErVqzQqlWrzvheu92uoqIiFRUVnXFee3u7xo4dqx07dujGG2+Uy+XSZZddpmeffVa33HKLJOmDDz7Q1VdfrdraWl133XVnrXu4fU4AAMA/BtpDcGUcAAAA/K67u1v19fXKycnxjoWEhCgnJ0e1tbV+W8flckmSYmJiJEn19fXq6enxWfeqq65SUlJSv+t2dXXJ7Xb7bAAAAIFCGAcAAAC/O3bsmHp7exUXF+czHhcXJ6fT6Zc1PB6PioqKdP311+uaa66RJDmdToWHh2v06NEDXrekpEQ2m827JSYm+qU+AACAvhDGAQAAYEhatmyZ9u7dq82bN5/XflavXi2Xy+Xdjh496qcKAQAAThcW7AIAAABw8YmNjVVoaOhpTzFtbW3t9+EM52L58uV66aWXtHPnTo0fP947Hh8fr+7ubh0/ftzn6rgzrWu1WmW1Ws+7JgAAgIHgyjgAAAD4XXh4uNLS0lRdXe0d83g8qq6uVlZW1qD3axiGli9fri1btmj79u2aOHGiz+tpaWkaMWKEz7oHDhzQkSNHzmtdAAAAf+HKOAAAAASEw+FQQUGBpk2bpoyMDJWVlamzs1OFhYWSpEWLFmncuHEqKSmR9MVDH5qamrw/Nzc3q7GxUVFRUUpOTpb0xVdTn332Wb344osaNWqU9z5wNptNkZGRstls+t73vieHw6GYmBhFR0drxYoVysrKGtCTVAEAAAKNMA4AAAABkZ+fr/b2dq1Zs0ZOp1OpqamqqqryPtThyJEjCgn58osaLS0tmjp1qvf30tJSlZaWKjs7WzU1NZKkDRs2SJKmT5/us9bTTz+txYsXS5Ief/xxhYSEKC8vT11dXcrNzdVPf/rTwB0oAADAObAYhmEEu4ihyO12y2azyeVyKTo6OtjlAACAIYD+YWjgcwIAAIMx0B6Ce8YBAAAAAAAAJiGMAwAAAAAAAExCGAcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgAAAAAAADAJIRxAAAAAAAAgEkI4wAAAAAAAACTEMYBAAAAAAAAJiGMAwAAAAAAAExCGAcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgAAAAAAADAJIRxAAAAAAAAgEkI4wAAAAAAAACTEMYBAAAAAAAAJiGMAwAAAAAAAExCGAcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgAAAAAAADAJIRxAAAAAAAAgEkI4wAAAAAAAACTEMYBAAAAAAAAJiGMAwAAAAAAAExCGAcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgAAAAAAADAJIRxAAAAAAAAgEkI4wAAAAAAAACTEMYBAAAAAAAAJiGMAwAAAAAAAExyQYRx5eXlstvtioiIUGZmpurq6vqdu2/fPuXl5clut8tisaisrOy0OSUlJUpPT9eoUaM0duxYzZs3TwcOHPCZc/LkSS1btkxjxoxRVFSU8vLy1Nra6u9DAwAAAAAAALyCHsZVVlbK4XCouLhYDQ0NSklJUW5urtra2vqcf+LECU2aNElr165VfHx8n3N27NihZcuWadeuXXrttdfU09Ojm266SZ2dnd4599xzj37729/q+eef144dO9TS0qIFCxYE5BgBAAAAAAAASbIYhmEEs4DMzEylp6dr/fr1kiSPx6PExEStWLFCq1atOuN77Xa7ioqKVFRUdMZ57e3tGjt2rHbs2KEbb7xRLpdLl112mZ599lndcsstkqQPPvhAV199tWpra3XdddedtW632y2bzSaXy6Xo6OiBHSwAABjW6B+GBj4nAAAwGAPtIYJ6ZVx3d7fq6+uVk5PjHQsJCVFOTo5qa2v9to7L5ZIkxcTESJLq6+vV09Pjs+5VV12lpKSkftft6uqS2+322QAAAAAAAIBzEdQw7tixY+rt7VVcXJzPeFxcnJxOp1/W8Hg8Kioq0vXXX69rrrlGkuR0OhUeHq7Ro0cPeN2SkhLZbDbvlpiY6Jf6AAAAAAAAMHwE/Z5xgbZs2TLt3btXmzdvPq/9rF69Wi6Xy7sdPXrUTxUCAAAAAABguAgL5uKxsbEKDQ097Smmra2t/T6c4VwsX75cL730knbu3Knx48d7x+Pj49Xd3a3jx4/7XB13pnWtVqusVut51wQAAAAAAIDhK6hXxoWHhystLU3V1dXeMY/Ho+rqamVlZQ16v4ZhaPny5dqyZYu2b9+uiRMn+ryelpamESNG+Kx74MABHTly5LzWBQAAAAAAAM4kqFfGSZLD4VBBQYGmTZumjIwMlZWVqbOzU4WFhZKkRYsWady4cSopKZH0xUMfmpqavD83NzersbFRUVFRSk5OlvTFV1OfffZZvfjiixo1apT3PnA2m02RkZGy2Wz63ve+J4fDoZiYGEVHR2vFihXKysoa0JNUAQAAAAAAgMEIehiXn5+v9vZ2rVmzRk6nU6mpqaqqqvI+1OHIkSMKCfnyAr6WlhZNnTrV+3tpaalKS0uVnZ2tmpoaSdKGDRskSdOnT/dZ6+mnn9bixYslSY8//rhCQkKUl5enrq4u5ebm6qc//WngDhQAAAAAAADDnsUwDCPYRQxFbrdbNptNLpdL0dHRwS4HAAAMAfQPQwOfEwAAGIyB9hAX/dNUAQAAAAAAgAsFYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAIGDKy8tlt9sVERGhzMxM1dXV9Tt33759ysvLk91ul8ViUVlZ2Wlzdu7cqTlz5ighIUEWi0Vbt249bU5HR4eWL1+u8ePHKzIyUpMnT9bGjRv9eFQAAACDRxgHAACAgKisrJTD4VBxcbEaGhqUkpKi3NxctbW19Tn/xIkTmjRpktauXav4+Pg+53R2diolJUXl5eX9rutwOFRVVaVf/vKX2r9/v4qKirR8+XJt27bNL8cFAABwPgjjAAAAEBDr1q3TnXfeqcLCQu/VaZdccok2bdrU5/z09HQ99thjuu2222S1WvucM2vWLD388MOaP39+v+u+8847Kigo0PTp02W327VkyRKlpKSc8ao8AAAAsxDGAQAAwO+6u7tVX1+vnJwc71hISIhycnJUW1sb0LW/+c1vatu2bWpubpZhGHrjjTf04Ycf6qabbupzfldXl9xut88GAAAQKIRxAAAA8Ltjx46pt7dXcXFxPuNxcXFyOp0BXfupp57S5MmTNX78eIWHh2vmzJkqLy/XjTfe2Of8kpIS2Ww275aYmBjQ+gAAwPBGGAcAAICLylNPPaVdu3Zp27Ztqq+v109+8hMtW7ZMr7/+ep/zV69eLZfL5d2OHj1qcsUAAGA4CQt2AQAAALj4xMbGKjQ0VK2trT7jra2t/T6cwR8+//xz/ed//qe2bNmi2bNnS5KuvfZaNTY2qrS01Odrs6dYrdZ+71EHAADgb1wZBwAAAL8LDw9XWlqaqqurvWMej0fV1dXKysoK2Lo9PT3q6elRSIhvmxsaGiqPxxOwdQEAAAaKK+MAAAAQEA6HQwUFBZo2bZoyMjJUVlamzs5OFRYWSpIWLVqkcePGqaSkRNIXD31oamry/tzc3KzGxkZFRUUpOTlZktTR0aGDBw961zh06JAaGxsVExOjpKQkRUdHKzs7WytXrlRkZKQmTJigHTt26JlnntG6detMPgMAAACnI4wDAABAQOTn56u9vV1r1qyR0+lUamqqqqqqvA91OHLkiM8VbC0tLZo6dar399LSUpWWlio7O1s1NTWSpN27d2vGjBneOQ6HQ5JUUFCgiooKSdLmzZu1evVq3X777fr00081YcIE/fjHP9bSpUsDfMQAAABnZzEMwwh2EUOR2+2WzWaTy+VSdHR0sMsBAABDAP3D0MDnBAAABmOgPQT3jAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMEvQwrry8XHa7XREREcrMzFRdXV2/c/ft26e8vDzZ7XZZLBaVlZWdNmfnzp2aM2eOEhISZLFYtHXr1tPmdHR0aPny5Ro/frwiIyM1efJkbdy40Y9HBQAAAAAAAJwuqGFcZWWlHA6HiouL1dDQoJSUFOXm5qqtra3P+SdOnNCkSZO0du1axcfH9zmns7NTKSkpKi8v73ddh8Ohqqoq/fKXv9T+/ftVVFSk5cuXa9u2bX45LgAAAAAAAKAvQQ3j1q1bpzvvvFOFhYXeq9MuueQSbdq0qc/56enpeuyxx3TbbbfJarX2OWfWrFl6+OGHNX/+/H7Xfeedd1RQUKDp06fLbrdryZIlSklJOeNVeQAAAAAAAMD5CloY193drfr6euXk5HxZTEiIcnJyVFtbG9C1v/nNb2rbtm1qbm6WYRh644039OGHH+qmm27q9z1dXV1yu90+GwAAAAAAAHAughbGHTt2TL29vYqLi/MZj4uLk9PpDOjaTz31lCZPnqzx48crPDxcM2fOVHl5uW688cZ+31NSUiKbzebdEhMTA1ojAAAAAAAALj5Bf4BDMDz11FPatWuXtm3bpvr6ev3kJz/RsmXL9Prrr/f7ntWrV8vlcnm3o0ePmlgxAAAAAAAALgZhwVo4NjZWoaGham1t9RlvbW3t9+EM/vD555/rP//zP7VlyxbNnj1bknTttdeqsbFRpaWlPl+b/Sqr1drvfeoAAAAAAACAgQjalXHh4eFKS0tTdXW1d8zj8ai6ulpZWVkBW7enp0c9PT0KCfE99NDQUHk8noCtCwAAAAAAAATtyjhJcjgcKigo0LRp05SRkaGysjJ1dnaqsLBQkrRo0SKNGzdOJSUlkr546ENTU5P35+bmZjU2NioqKkrJycmSpI6ODh08eNC7xqFDh9TY2KiYmBglJSUpOjpa2dnZWrlypSIjIzVhwgTt2LFDzzzzjNatW2fyGQAAAAAAAMBwEtQwLj8/X+3t7VqzZo2cTqdSU1NVVVXlfajDkSNHfK5ga2lp0dSpU72/l5aWqrS0VNnZ2aqpqZEk7d69WzNmzPDOcTgckqSCggJVVFRIkjZv3qzVq1fr9ttv16effqoJEyboxz/+sZYuXRrgIwYAAAAAAMBwZjEMwwh2EUOR2+2WzWaTy+VSdHR0sMsBAABDAP3D0MDnBAAABmOgPcSwfJoqAAAAAAAAEAyEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAFTXl4uu92uiIgIZWZmqq6urt+5+/btU15enux2uywWi8rKyk6bs3PnTs2ZM0cJCQmyWCzaunVrn/vav3+/5s6dK5vNppEjRyo9PV1Hjhzx01EBAAAMHmEcAAAAAqKyslIOh0PFxcVqaGhQSkqKcnNz1dbW1uf8EydOaNKkSVq7dq3i4+P7nNPZ2amUlBSVl5f3u+4f//hH3XDDDbrqqqtUU1OjPXv26IEHHlBERIRfjgsAAOB8WAzDMIJdxFDkdrtls9nkcrkUHR0d7HIAAMAQMNz6h8zMTKWnp2v9+vWSJI/Ho8TERK1YsUKrVq0643vtdruKiopUVFTU7xyLxaItW7Zo3rx5PuO33XabRowYoV/84heDqnu4fU4AAMA/BtpDcGUcAAAA/K67u1v19fXKycnxjoWEhCgnJ0e1tbUBW9fj8ejll1/WFVdcodzcXI0dO1aZmZn9fp1Vkrq6uuR2u302AACAQCGMAwAAgN8dO3ZMvb29iouL8xmPi4uT0+kM2LptbW3q6OjQ2rVrNXPmTL366quaP3++FixYoB07dvT5npKSEtlsNu+WmJgYsPoAAAAI4wAAAHDR8Hg8kqSbb75Z99xzj1JTU7Vq1Sr90z/9kzZu3Njne1avXi2Xy+Xdjh49ambJAABgmAkLdgEAAAC4+MTGxio0NFStra0+462trf0+nMFf64aFhWny5Mk+41dffbXeeuutPt9jtVpltVoDVhMAAMBXcWUcAAAA/C48PFxpaWmqrq72jnk8HlVXVysrKyug66anp+vAgQM+4x9++KEmTJgQsHUBAAAGiivjAAAAEBAOh0MFBQWaNm2aMjIyVFZWps7OThUWFkqSFi1apHHjxqmkpETSFw99aGpq8v7c3NysxsZGRUVFKTk5WZLU0dGhgwcPetc4dOiQGhsbFRMTo6SkJEnSypUrlZ+frxtvvFEzZsxQVVWVfvvb36qmpsbEowcAAOgbYRwAAAACIj8/X+3t7VqzZo2cTqdSU1NVVVXlfajDkSNHFBLy5Rc1WlpaNHXqVO/vpaWlKi0tVXZ2tjdI2717t2bMmOGd43A4JEkFBQWqqKiQJM2fP18bN25USUmJ7r77bl155ZX63//9X91www0BPmIAAICzsxiGYQS7iKHI7XbLZrPJ5XIpOjo62OUAAIAhgP5haOBzAgAAgzHQHoJ7xgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMMKow7evSoPvnkE+/vdXV1Kioq0s9+9jO/FQYAAADz0ecBAAAE1qDCuO985zt64403JElOp1P/8A//oLq6Ot1///364Q9/6NcCAQAAYB76PAAAgMAaVBi3d+9eZWRkSJJ+/etf65prrtE777yjX/3qV6qoqPBnfQAAADARfR4AAEBgDSqM6+npkdVqlSS9/vrrmjt3riTpqquu0p/+9Cf/VQcAAABT0ecBAAAE1qDCuClTpmjjxo1688039dprr2nmzJmSpJaWFo0ZM8avBQIAAMA89HkAAACBNagw7pFHHtF//dd/afr06Vq4cKFSUlIkSdu2bfN+rQEAAABDD30eAABAYFkMwzAG88be3l653W5deuml3rGPP/5Yl1xyicaOHeu3Ai9UbrdbNptNLpdL0dHRwS4HAAAMAUOlf6DPGxqfEwAAuLAMtIcY1JVxn3/+ubq6urwN2uHDh1VWVqYDBw4MiwYNAADgYkWfBwAAEFiDCuNuvvlmPfPMM5Kk48ePKzMzUz/5yU80b948bdiw4Zz2VV5eLrvdroiICGVmZqqurq7fufv27VNeXp7sdrssFovKyspOm7Nz507NmTNHCQkJslgs2rp1a5/72r9/v+bOnSubzaaRI0cqPT1dR44cOafaAQAALjb+7PMAAABwukGFcQ0NDfrWt74lSXrhhRcUFxenw4cP65lnntGTTz454P1UVlbK4XCouLhYDQ0NSklJUW5urtra2vqcf+LECU2aNElr165VfHx8n3M6OzuVkpKi8vLyftf94x//qBtuuEFXXXWVampqtGfPHj3wwAOKiIgYcO0AAAAXI3/1eQAAAOhb2GDedOLECY0aNUqS9Oqrr2rBggUKCQnRddddp8OHDw94P+vWrdOdd96pwsJCSdLGjRv18ssva9OmTVq1atVp89PT05Weni5Jfb4uSbNmzdKsWbPOuO7999+vf/zHf9Sjjz7qHfu7v/u7AdcNAABwsfJXnwcAAIC+DerKuOTkZG3dulVHjx7VK6+8optuukmS1NbWNuCb3HZ3d6u+vl45OTlfFhMSopycHNXW1g6mrAHxeDx6+eWXdcUVVyg3N1djx45VZmZmv19nPaWrq0tut9tnAwAAuNj4o88DAABA/wYVxq1Zs0bf//73ZbfblZGRoaysLElf/Ovp1KlTB7SPY8eOqbe3V3FxcT7jcXFxcjqdgylrQNra2tTR0aG1a9dq5syZevXVVzV//nwtWLBAO3bs6Pd9JSUlstls3i0xMTFgNQIAAASLP/o8AAAA9G9QX1O95ZZbdMMNN+hPf/qTUlJSvOPf/va3NX/+fL8VFwgej0fSFzcnvueeeyRJqampeuedd7Rx40ZlZ2f3+b7Vq1fL4XB4f3e73QRyAADgojOU+zwAAIChYFBhnCTFx8crPj5en3zyiSRp/PjxysjIGPD7Y2NjFRoaqtbWVp/x1tbWfh/O4A+xsbEKCwvT5MmTfcavvvpqvfXWW/2+z2q1ymq1BqwuAACAC8X59nkAAADo36C+purxePTDH/5QNptNEyZM0IQJEzR69Gj96Ec/8l55djbh4eFKS0tTdXW1z36rq6u9X4cIhPDwcKWnp+vAgQM+4x9++KEmTJgQsHUBAACGAn/0eQAAAOjfoK6Mu//++/Xzn/9ca9eu1fXXXy9Jeuutt/Tggw/q5MmT+vGPfzyg/TgcDhUUFGjatGnKyMhQWVmZOjs7vU9XXbRokcaNG6eSkhJJXzz0oampyftzc3OzGhsbFRUVpeTkZElSR0eHDh486F3j0KFDamxsVExMjJKSkiRJK1euVH5+vm688UbNmDFDVVVV+u1vf6uamprBnA4AAICLhr/6PAAAAPTNYhiGca5vSkhI0MaNGzV37lyf8RdffFF33XWXmpubB7yv9evX67HHHpPT6VRqaqqefPJJZWZmSpKmT58uu92uiooKSdLHH3+siRMnnraP7Oxsb5BWU1OjGTNmnDanoKDAux9J2rRpk0pKSvTJJ5/oyiuv1EMPPaSbb755wHW73W7ZbDa5XC6eLAYAAAZkKPQP/uzzhqqh8DkBAIALz0B7iEGFcREREdqzZ4+uuOIKn/EDBw4oNTVVn3/++blXPMTQpAEAgHM1FPoH+ryh8TkBAIALz0B7iEHdMy4lJUXr168/bXz9+vW69tprB7NLAAAAXADo8wAAAAJrUPeMe/TRRzV79my9/vrr3oct1NbW6ujRo/rd737n1wIBAABgHvo8AACAwBrUlXHZ2dn68MMPNX/+fB0/flzHjx/XggULtG/fPv3iF7/wd40AAAAwCX0eAABAYA3qnnH9ee+99/SNb3xDvb29/trlBYt7iQAAgHM1lPsH+jwAAIAzC+g94wAAAAAAAACcO8I4AAAAAAAAwCSEcQAAAAAAAIBJzulpqgsWLDjj68ePHz+fWgAAABAk9HkAAADmOKcr42w22xm3CRMmaNGiRYGqFQAAAAESqD6vvLxcdrtdERERyszMVF1dXb9z9+3bp7y8PNntdlksFpWVlZ02Z+fOnZozZ44SEhJksVi0devWM66/dOnSfvcFAAAQDOd0ZdzTTz8dqDoAAAAQRIHo8yorK+VwOLRx40ZlZmaqrKxMubm5OnDggMaOHXva/BMnTmjSpEn653/+Z91zzz197rOzs1MpKSn6l3/5l7Nezbdlyxbt2rVLCQkJfjkeAAAAfzinMA4AAAAYqHXr1unOO+9UYWGhJGnjxo16+eWXtWnTJq1ateq0+enp6UpPT5ekPl+XpFmzZmnWrFlnXbu5uVkrVqzQK6+8otmzZ59xbldXl7q6ury/u93us+4fAABgsHiAAwAAAPyuu7tb9fX1ysnJ8Y6FhIQoJydHtbW1AV3b4/Hojjvu0MqVKzVlypSzzi8pKfH5Sm5iYmJA6wMAAMMbYRwAAAD87tixY+rt7VVcXJzPeFxcnJxOZ0DXfuSRRxQWFqa77757QPNXr14tl8vl3Y4ePRrQ+gAAwPDG11QBAABw0aivr9cTTzyhhoYGWSyWAb3HarXKarUGuDIAAIAvcGUcAAAA/C42NlahoaFqbW31GW9tbVV8fHzA1n3zzTfV1tampKQkhYWFKSwsTIcPH9a9994ru90esHUBAAAGijAOAAAAfhceHq60tDRVV1d7xzwej6qrq5WVlRWwde+44w7t2bNHjY2N3i0hIUErV67UK6+8ErB1AQAABoqvqQIAACAgHA6HCgoKNG3aNGVkZKisrEydnZ3ep6suWrRI48aNU0lJiaQvHvrQ1NTk/bm5uVmNjY2KiopScnKyJKmjo0MHDx70rnHo0CE1NjYqJiZGSUlJGjNmjMaMGeNTx4gRIxQfH68rr7zSjMMGAAA4I8I4AAAABER+fr7a29u1Zs0aOZ1OpaamqqqqyvtQhyNHjigk5MsvarS0tGjq1Kne30tLS1VaWqrs7GzV1NRIknbv3q0ZM2Z45zgcDklSQUGBKioqAn9QAAAA58liGIYR7CKGIrfbLZvNJpfLpejo6GCXAwAAhgD6h6GBzwkAAAzGQHsI7hkHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACa5IMK48vJy2e12RUREKDMzU3V1df3O3bdvn/Ly8mS322WxWFRWVnbanJ07d2rOnDlKSEiQxWLR1q1bz7j+0qVL+90XAAAAAAAA4C9BD+MqKyvlcDhUXFyshoYGpaSkKDc3V21tbX3OP3HihCZNmqS1a9cqPj6+zzmdnZ1KSUlReXn5WdffsmWLdu3apYSEhPM6DgAAAAAAAOBsgh7GrVu3TnfeeacKCws1efJkbdy4UZdccok2bdrU5/z09HQ99thjuu2222S1WvucM2vWLD388MOaP3/+Gddubm7WihUr9Ktf/UojRow472MBAAAAAAAAziSoYVx3d7fq6+uVk5PjHQsJCVFOTo5qa2sDurbH49Edd9yhlStXasqUKWed39XVJbfb7bMBAAAAAAAA5yKoYdyxY8fU29uruLg4n/G4uDg5nc6Arv3II48oLCxMd99994Dml5SUyGazebfExMSA1gcAAAAAAICLT9C/phoM9fX1euKJJ1RRUSGLxTKg96xevVoul8u7HT16NMBVAgAAAAAA4GIT1DAuNjZWoaGham1t9RlvbW3t9+EM/vDmm2+qra1NSUlJCgsLU1hYmA4fPqx7771Xdru9z/dYrVZFR0f7bAAAAAAAAMC5CGoYFx4errS0NFVXV3vHPB6PqqurlZWVFbB177jjDu3Zs0eNjY3eLSEhQStXrtQrr7wSsHUBAAAAAAAwvIUFuwCHw6GCggJNmzZNGRkZKisrU2dnpwoLCyVJixYt0rhx41RSUiLpi4c+NDU1eX9ubm5WY2OjoqKilJycLEnq6OjQwYMHvWscOnRIjY2NiomJUVJSksaMGaMxY8b41DFixAjFx8fryiuvNOOwAQAAAAAAMAwFPYzLz89Xe3u71qxZI6fTqdTUVFVVVXkf6nDkyBGFhHx5AV9LS4umTp3q/b20tFSlpaXKzs5WTU2NJGn37t2aMWOGd47D4ZAkFRQUqKKiIvAHBQAAAAAAAPTBYhiGEewihiK32y2bzSaXy8X94wAAwIDQPwwNfE4AAGAwBtpDDMunqQIAAAAAAADBQBgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAEDAlJeXy263KyIiQpmZmaqrq+t37r59+5SXlye73S6LxaKysrLT5uzcuVNz5sxRQkKCLBaLtm7d6vN6T0+P7rvvPn3961/XyJEjlZCQoEWLFqmlpcXPRwYAADA4hHEAAAAIiMrKSjkcDhUXF6uhoUEpKSnKzc1VW1tbn/NPnDihSZMmae3atYqPj+9zTmdnp1JSUlReXt7vPhoaGvTAAw+ooaFBv/nNb3TgwAHNnTvXb8cFAABwPiyGYRjBLmIocrvdstlscrlcio6ODnY5AABgCBhu/UNmZqbS09O1fv16SZLH41FiYqJWrFihVatWnfG9drtdRUVFKioq6neOxWLRli1bNG/evDPu6w9/+IMyMjJ0+PBhJSUlnfZ6V1eXurq6vL+73W4lJiYOm88JAAD4x0B7Pa6MAwAAgN91d3ervr5eOTk53rGQkBDl5OSotrbW1FpcLpcsFotGjx7d5+slJSWy2WzeLTEx0dT6AADA8EIYBwAAAL87duyYent7FRcX5zMeFxcnp9NpWh0nT57Ufffdp4ULF/b7L9SrV6+Wy+XybkePHjWtPgAAMPyEBbsAAAAAIBB6enp06623yjAMbdiwod95VqtVVqvVxMoAAMBwRhgHAAAAv4uNjVVoaKhaW1t9xltbW/t9OIM/nQriDh8+rO3bt3PvNwAAcMHga6oAAADwu/DwcKWlpam6uto75vF4VF1draysrICufSqI++ijj/T6669rzJgxAV0PAADgXHBlHAAAAALC4XCooKBA06ZNU0ZGhsrKytTZ2anCwkJJ0qJFizRu3DiVlJRI+uKhD01NTd6fm5ub1djYqKioKCUnJ0uSOjo6dPDgQe8ahw4dUmNjo2JiYpSUlKSenh7dcsstamho0EsvvaTe3l7vPepiYmIUHh5u5ikAAAA4DWEcAAAAAiI/P1/t7e1as2aNnE6nUlNTVVVV5X2ow5EjRxQS8uUXNVpaWjR16lTv76WlpSotLVV2drZqamokSbt379aMGTO8cxwOhySpoKBAFRUVam5u1rZt2yRJqampPvW88cYbmj59egCOFAAAYOAshmEYwS5iKHK73bLZbHK5XNyDBAAADAj9w9DA5wQAAAZjoD0E94wDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASS6IMK68vFx2u10RERHKzMxUXV1dv3P37dunvLw82e12WSwWlZWVnTZn586dmjNnjhISEmSxWLR161af13t6enTffffp61//ukaOHKmEhAQtWrRILS0tfj4yAAAAAAAA4EtBD+MqKyvlcDhUXFyshoYGpaSkKDc3V21tbX3OP3HihCZNmqS1a9cqPj6+zzmdnZ1KSUlReXl5v/toaGjQAw88oIaGBv3mN7/RgQMHNHfuXL8dFwAAAAAAAPC3LIZhGMEsIDMzU+np6Vq/fr0kyePxKDExUStWrNCqVavO+F673a6ioiIVFRX1O8disWjLli2aN2/eGff1hz/8QRkZGTp8+LCSkpLOWrfb7ZbNZpPL5VJ0dPRZ5wMAANA/DA18TgAAYDAG2kME9cq47u5u1dfXKycnxzsWEhKinJwc1dbWmlqLy+WSxWLR6NGj+3y9q6tLbrfbZwMAAAAAAADORVDDuGPHjqm3t1dxcXE+43FxcXI6nabVcfLkSd13331auHBhv8llSUmJbDabd0tMTDStPgAAAAAAAFwcgn7PuGDr6enRrbfeKsMwtGHDhn7nrV69Wi6Xy7sdPXrUxCoBAAAAAABwMQgL5uKxsbEKDQ1Va2urz3hra2u/D2fwp1NB3OHDh7V9+/Yzfp/XarXKarUGvCYAAAAAAABcvIJ6ZVx4eLjS0tJUXV3tHfN4PKqurlZWVlZA1z4VxH300Ud6/fXXNWbMmICuBwAAAAAAAAT1yjhJcjgcKigo0LRp05SRkaGysjJ1dnaqsLBQkrRo0SKNGzdOJSUlkr546ENTU5P35+bmZjU2NioqKkrJycmSpI6ODh08eNC7xqFDh9TY2KiYmBglJSWpp6dHt9xyixoaGvTSSy+pt7fXe4+6mJgYhYeHm3kKAAAAAAAAMEwEPYzLz89Xe3u71qxZI6fTqdTUVFVVVXkf6nDkyBGFhHx5AV9LS4umTp3q/b20tFSlpaXKzs5WTU2NJGn37t2aMWOGd47D4ZAkFRQUqKKiQs3Nzdq2bZskKTU11aeeN954Q9OnTw/AkQIAAAAAAGC4sxiGYQS7iKHI7XbLZrPJ5XKd8V5zAAAAp9A/DA18TgAAYDAG2kMM+6epAgAAAAAAAGYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAQAAAAAAACYhjAMAAEDAlJeXy263KyIiQpmZmaqrq+t37r59+5SXlye73S6LxaKysrLT5uzcuVNz5sxRQkKCLBaLtm7detocwzC0Zs0aXX755YqMjFROTo4++ugjPx4VAADA4BHGAQAAICAqKyvlcDhUXFyshoYGpaSkKDc3V21tbX3OP3HihCZNmqS1a9cqPj6+zzmdnZ1KSUlReXl5v+s++uijevLJJ7Vx40a9++67GjlypHJzc3Xy5Em/HBcAAMD5sBiGYQS7iKHI7XbLZrPJ5XIpOjo62OUAAIAhYLj1D5mZmUpPT9f69eslSR6PR4mJiVqxYoVWrVp1xvfa7XYVFRWpqKio3zkWi0VbtmzRvHnzvGOGYSghIUH33nuvvv/970uSXC6X4uLiVFFRodtuu+2sdQ+3zwkAAPjHQHsIrowDAACA33V3d6u+vl45OTnesZCQEOXk5Ki2tjZg6x46dEhOp9NnXZvNpszMzH7X7erqktvt9tkAAAAChTAOAAAAfnfs2DH19vYqLi7OZzwuLk5OpzNg657a97msW1JSIpvN5t0SExMDVh8AAABhHAAAAIa11atXy+VyebejR48GuyQAAHARI4wDAACA38XGxio0NFStra0+462trf0+nMEfTu37XNa1Wq2Kjo722QAAAAKFMA4AAAB+Fx4errS0NFVXV3vHPB6PqqurlZWVFbB1J06cqPj4eJ913W633n333YCuCwAAMFBhwS4AAAAAFyeHw6GCggJNmzZNGRkZKisrU2dnpwoLCyVJixYt0rhx41RSUiLpi4c+NDU1eX9ubm5WY2OjoqKilJycLEnq6OjQwYMHvWscOnRIjY2NiomJUVJSkiwWi4qKivTwww/ra1/7miZOnKgHHnhACQkJPk9dBQAACBbCOAAAAAREfn6+2tvbtWbNGjmdTqWmpqqqqsr7cIUjR44oJOTLL2q0tLRo6tSp3t9LS0tVWlqq7Oxs1dTUSJJ2796tGTNmeOc4HA5JUkFBgSoqKiRJ//Ef/6HOzk4tWbJEx48f1w033KCqqipFREQE+IgBAADOzmIYhhHsIoYit9stm80ml8vFfUUAAMCA0D8MDXxOAABgMAbaQ3DPOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwCWEcAAAAAAAAYBLCOAAAAAAAAMAkhHEAAAAAAACASQjjAAAAAAAAAJMQxgEAAAAAAAAmIYwDAAAAAAAATHJBhHHl5eWy2+2KiIhQZmam6urq+p27b98+5eXlyW63y2KxqKys7LQ5O3fu1Jw5c5SQkCCLxaKtW7eeNscwDK1Zs0aXX365IiMjlZOTo48++siPRwUAAAAAAAD4CnoYV1lZKYfDoeLiYjU0NCglJUW5ublqa2vrc/6JEyc0adIkrV27VvHx8X3O6ezsVEpKisrLy/td99FHH9WTTz6pjRs36t1339XIkSOVm5urkydP+uW4AAAAAAAAgL9lMQzDCGYBmZmZSk9P1/r16yVJHo9HiYmJWrFihVatWnXG99rtdhUVFamoqKjfORaLRVu2bNG8efO8Y4ZhKCEhQffee6++//3vS5JcLpfi4uJUUVGh22677ax1u91u2Ww2uVwuRUdHn/1AAQDAsEf/MDTwOQEAgMEYaA8R1Cvjuru7VV9fr5ycHO9YSEiIcnJyVFtbG7B1Dx06JKfT6bOuzWZTZmZmv+t2dXXJ7Xb7bAAAAAAAAMC5CGoYd+zYMfX29iouLs5nPC4uTk6nM2Drntr3uaxbUlIim83m3RITEwNWHwAAAAAAAC5OQb9n3FCxevVquVwu73b06NFglwQAAAAAAIAhJqhhXGxsrEJDQ9Xa2uoz3tra2u/DGfzh1L7PZV2r1aro6GifDQAAAAAAADgXQQ3jwsPDlZaWpurqau+Yx+NRdXW1srKyArbuxIkTFR8f77Ou2+3Wu+++G9B1AQAAAAAAMLyFBbsAh8OhgoICTZs2TRkZGSorK1NnZ6cKCwslSYsWLdK4ceNUUlIi6YuHPjQ1NXl/bm5uVmNjo6KiopScnCxJ6ujo0MGDB71rHDp0SI2NjYqJiVFSUpIsFouKior08MMP62tf+5omTpyoBx54QAkJCT5PXQUAAAAAAAD8KehhXH5+vtrb27VmzRo5nU6lpqaqqqrK+3CFI0eOKCTkywv4WlpaNHXqVO/vpaWlKi0tVXZ2tmpqaiRJu3fv1owZM7xzHA6HJKmgoEAVFRWSpP/4j/9QZ2enlixZouPHj+uGG25QVVWVIiIiAnzEAAAAAAAAGK4shmEYwS5iKHK73bLZbHK5XNw/DgAADAj9w9DA5wQAAAZjoD0ET1MFAAAAAAAATEIYBwAAAAAAAJiEMA4AAAAAAAAwSdAf4DBUnbrVntvtDnIlAABgqDjVN3DL3gsbfR4AABiMgfZ6hHGD9Nlnn0mSEhMTg1wJAAAYaj777DPZbLZgl4F+0OcBAIDzcbZej6epDpLH41FLS4tGjRoli8US7HIuOG63W4mJiTp69ChPIQsCzn9wcf6Di/MfXJz/MzMMQ5999pkSEhIUEsLdQi5U9Hlnxt/z4OL8BxfnP7g4/8HF+T+7gfZ6XBk3SCEhIRo/fnywy7jgRUdH85c0iDj/wcX5Dy7Of3Bx/vvHFXEXPvq8geHveXBx/oOL8x9cnP/g4vyf2UB6Pf5JFgAAAAAAADAJYRwAAAAAAABgEsI4BITValVxcbGsVmuwSxmWOP/BxfkPLs5/cHH+gYsff8+Di/MfXJz/4OL8Bxfn3394gAMAAAAAAABgEq6MAwAAAAAAAExCGAcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgMyqeffqrbb79d0dHRGj16tL73ve+po6PjjO85efKkli1bpjFjxigqKkp5eXlqbW3tc+6f//xnjR8/XhaLRcePHw/AEQx9gfgM3nvvPS1cuFCJiYmKjIzU1VdfrSeeeCLQhzIklJeXy263KyIiQpmZmaqrqzvj/Oeff15XXXWVIiIi9PWvf12/+93vfF43DENr1qzR5ZdfrsjISOXk5Oijjz4K5CEMaf48/z09Pbrvvvv09a9/XSNHjlRCQoIWLVqklpaWQB/GkOXvP/9ftXTpUlksFpWVlfm5agDng14vuOjzzEWfF1z0ecFFnxckBjAIM2fONFJSUoxdu3YZb775ppGcnGwsXLjwjO9ZunSpkZiYaFRXVxu7d+82rrvuOuOb3/xmn3NvvvlmY9asWYYk4y9/+UsAjmDoC8Rn8POf/9y4++67jZqaGuOPf/yj8Ytf/MKIjIw0nnrqqUAfzgVt8+bNRnh4uLFp0yZj3759xp133mmMHj3aaG1t7XP+22+/bYSGhhqPPvqo0dTUZPzgBz8wRowYYbz//vveOWvXrjVsNpuxdetW47333jPmzp1rTJw40fj888/NOqwhw9/n//jx40ZOTo5RWVlpfPDBB0Ztba2RkZFhpKWlmXlYQ0Yg/vyf8pvf/MZISUkxEhISjMcffzzARwLgXNDrBRd9nnno84KLPi+46POChzAO56ypqcmQZPzhD3/wjv3+9783LBaL0dzc3Od7jh8/bowYMcJ4/vnnvWP79+83JBm1tbU+c3/6058a2dnZRnV1NQ1aPwL9GXzVXXfdZcyYMcN/xQ9BGRkZxrJly7y/9/b2GgkJCUZJSUmf82+99VZj9uzZPmOZmZnGv/7rvxqGYRgej8eIj483HnvsMe/rx48fN6xWq/Hcc88F4AiGNn+f/77U1dUZkozDhw/7p+iLSKDO/yeffGKMGzfO2Lt3rzFhwgSaNOACQq8XXPR55qLPCy76vOCizwsevqaKc1ZbW6vRo0dr2rRp3rGcnByFhITo3Xff7fM99fX16unpUU5OjnfsqquuUlJSkmpra71jTU1N+uEPf6hnnnlGISH88exPID+Dv+VyuRQTE+O/4oeY7u5u1dfX+5y3kJAQ5eTk9HveamtrfeZLUm5urnf+oUOH5HQ6febYbDZlZmae8bMYjgJx/vvicrlksVg0evRov9R9sQjU+fd4PLrjjju0cuVKTZkyJTDFAxg0er3gos8zD31ecNHnBRd9XnDxv4A4Z06nU2PHjvUZCwsLU0xMjJxOZ7/vCQ8PP+2/AOPi4rzv6erq0sKFC/XYY48pKSkpILVfLAL1Gfytd955R5WVlVqyZIlf6h6Kjh07pt7eXsXFxfmMn+m8OZ3OM84/9Z/nss/hKhDn/2+dPHlS9913nxYuXKjo6Gj/FH6RCNT5f+SRRxQWFqa7777b/0UDOG/0esFFn2ce+rzgos8LLvq84CKMg9eqVatksVjOuH3wwQcBW3/16tW6+uqr9d3vfjdga1zogv0ZfNXevXt18803q7i4WDfddJMpawJm6+np0a233irDMLRhw4ZglzMs1NfX64knnlBFRYUsFkuwywGGlWD3GcO91wv2+f8q+jwMB/R55qPPG7iwYBeAC8e9996rxYsXn3HOpEmTFB8fr7a2Np/x//u//9Onn36q+Pj4Pt8XHx+v7u5uHT9+3Odf7FpbW73v2b59u95//3298MILkr54CpEkxcbG6v7779dDDz00yCMbOoL9GZzS1NSkb3/721qyZIl+8IMfDOpYLhaxsbEKDQ097WlwfZ23U+Lj4884/9R/tra26vLLL/eZk5qa6sfqh75AnP9TTjVohw8f1vbt2/nX0j4E4vy/+eabamtr87kqpre3V/fee6/Kysr08ccf+/cgAHgFu88Y7r1esM//KfR5X6LPCy76vOCizwuy4N6yDkPRqZvK7t692zv2yiuvDOimsi+88IJ37IMPPvC5qezBgweN999/37tt2rTJkGS88847/T7NZbgK1GdgGIaxd+9eY+zYscbKlSsDdwBDTEZGhrF8+XLv7729vca4cePOeGPTf/qnf/IZy8rKOu3GvqWlpd7XXS4XN/bth7/Pv2EYRnd3tzFv3jxjypQpRltbW2AKv0j4+/wfO3bM57/r33//fSMhIcG47777jA8++CBwBwJgwOj1gos+z1z0ecFFnxdc9HnBQxiHQZk5c6YxdepU49133zXeeust42tf+5rP49Y/+eQT48orrzTeffdd79jSpUuNpKQkY/v27cbu3buNrKwsIysrq9813njjDZ6wdQaB+Azef/9947LLLjO++93vGn/605+823D/H7HNmzcbVqvVqKioMJqamowlS5YYo0ePNpxOp2EYhnHHHXcYq1at8s5/++23jbCwMKO0tNTYv3+/UVxc3Ocj70ePHm28+OKLxp49e4ybb76ZR973w9/nv7u725g7d64xfvx4o7Gx0efPeldXV1CO8UIWiD//f4unbAEXHnq94KLPMw99XnDR5wUXfV7wEMZhUP785z8bCxcuNKKioozo6GijsLDQ+Oyzz7yvHzp0yJBkvPHGG96xzz//3LjrrruMSy+91LjkkkuM+fPnG3/605/6XYMG7cwC8RkUFxcbkk7bJkyYYOKRXZieeuopIykpyQgPDzcyMjKMXbt2eV/Lzs42CgoKfOb/+te/Nq644gojPDzcmDJlivHyyy/7vO7xeIwHHnjAiIuLM6xWq/Htb3/bOHDggBmHMiT58/yf+rvR1/bVvy/4kr///P8tmjTgwkOvF1z0eeaizwsu+rzgos8LDoth/PVmDQAAAAAAAAACiqepAgAAAAAAACYhjAMAAAAAAABMQhgHAAAAAAAAmIQwDgAAAAAAADAJYRwAAAAAAABgEsI4AAAAAAAAwCSEcQAAAAAAAIBJCOMAAAAAAAAAkxDGAcAFwmKxaOvWrcEuAwAAAH5GnwfgqwjjAEDS4sWLZbFYTttmzpwZ7NIAAABwHujzAFxowoJdAABcKGbOnKmnn37aZ8xqtQapGgAAAPgLfR6ACwlXxgHAX1mtVsXHx/tsl156qaQvvlqwYcMGzZo1S5GRkZo0aZJeeOEFn/e///77+vu//3tFRkZqzJgxWrJkiTo6OnzmbNq0SVOmTJHVatXll1+u5cuX+7x+7NgxzZ8/X5dccom+9rWvadu2bYE9aAAAgGGAPg/AhYQwDgAG6IEHHlBeXp7ee+893X777brtttu0f/9+SVJnZ6dyc3N16aWX6g9/+IOef/55vf766z5N2IYNG7Rs2TItWbJE77//vrZt26bk5GSfNR566CHdeuut2rNnj/7xH/9Rt99+uz799FNTjxMAAGC4oc8DYCoDAGAUFBQYoaGhxsiRI322H//4x4ZhGIYkY+nSpT7vyczMNP7t3/7NMAzD+NnPfmZceumlRkdHh/f1l19+2QgJCTGcTqdhGIaRkJBg3H///f3WIMn4wQ9+4P29o6PDkGT8/ve/99txAgAADDf0eQAuNNwzDgD+asaMGdqwYYPPWExMjPfnrKwsn9eysrLU2NgoSdq/f79SUlI0cuRI7+vXX3+9PB6PDhw4IIvFopaWFn37298+Yw3XXnut9+eRI0cqOjpabW1tgz0kAAAAiD4PwIWFMA4A/mrkyJGnfZ3AXyIjIwc0b8SIET6/WywWeTyeQJQEAAAwbNDnAbiQcM84ABigXbt2nfb71VdfLUm6+uqr9d5776mzs9P7+ttvv62QkBBdeeWVGjVqlOx2u6qrq02tGQAAAGdHnwfATFwZBwB/1dXVJafT6TMWFham2NhYSdLzzz+vadOm6YYbbtCvfvUr1dXV6ec//7kk6fbbb1dxcbEKCgr04IMPqr29XStWrNAdd9yhuLg4SdKDDz6opUuXauzYsZo1a5Y+++wzvf3221qxYoW5BwoAADDM0OcBuJAQxgHAX1VVVenyyy/3Gbvyyiv1wQcfSPriCVibN2/WXXfdpcsvv1zPPfecJk+eLEm65JJL9Morr+jf//3flZ6erksuuUR5eXlat26dd18FBQU6efKkHn/8cX3/+99XbGysbrnlFvMOEAAAYJiizwNwIbEYhmEEuwgAuNBZLBZt2bJF8+bNC3YpAAAA8CP6PABm455xAAAAAAAAgEkI4wAAAAAAAACT8DVVAAAAAAAAwCRcGQcAAAAAAACYhDAOAAAAAAAAMAlhHAAAAAAAAGASwjgAAAAAAADAJIRxAAAAAAAAgEkI4wAAAAAAAACTEMYBAAAAAAAAJiGMAwAAAAAAAEzy/wF2BqNzm3P5PgAAAABJRU5ErkJggg==",
|
| 934 |
+
"text/plain": [
|
| 935 |
+
"<Figure size 1500x500 with 2 Axes>"
|
| 936 |
+
]
|
| 937 |
+
},
|
| 938 |
+
"metadata": {},
|
| 939 |
+
"output_type": "display_data"
|
| 940 |
+
}
|
| 941 |
+
],
|
| 942 |
+
"source": [
|
| 943 |
+
"# Plot the cost over training and validation sets\n",
|
| 944 |
+
"fig,ax = plt.subplots(1,2,figsize=(15,5))\n",
|
| 945 |
+
"for i,key in enumerate(cost_paths.keys()):\n",
|
| 946 |
+
" ax_sub=ax[i%3]\n",
|
| 947 |
+
" ax_sub.plot(cost_paths[key])\n",
|
| 948 |
+
" ax_sub.set_title(key)\n",
|
| 949 |
+
" ax_sub.set_xlabel('Epoch')\n",
|
| 950 |
+
" ax_sub.set_ylabel('Loss')\n",
|
| 951 |
+
"plt.show()"
|
| 952 |
+
]
|
| 953 |
+
},
|
| 954 |
+
{
|
| 955 |
+
"cell_type": "code",
|
| 956 |
+
"execution_count": 22,
|
| 957 |
+
"metadata": {},
|
| 958 |
+
"outputs": [],
|
| 959 |
+
"source": [
|
| 960 |
+
"# Save the entire model\n",
|
| 961 |
+
"torch.save(model, os.getcwd() + 'recommender.pt')\n"
|
| 962 |
+
]
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"cell_type": "code",
|
| 966 |
+
"execution_count": 23,
|
| 967 |
+
"metadata": {},
|
| 968 |
+
"outputs": [
|
| 969 |
+
{
|
| 970 |
+
"name": "stdout",
|
| 971 |
+
"output_type": "stream",
|
| 972 |
+
"text": [
|
| 973 |
+
"Predicted rating is 0.1\n"
|
| 974 |
+
]
|
| 975 |
+
}
|
| 976 |
+
],
|
| 977 |
+
"source": [
|
| 978 |
+
"def predict_rating(model,playlist_id,artist_id, device):\n",
|
| 979 |
+
" # Get predicted rating for a specific user-item pair from model\n",
|
| 980 |
+
" model = model.to(device)\n",
|
| 981 |
+
" with torch.no_grad():\n",
|
| 982 |
+
" model.eval()\n",
|
| 983 |
+
" X = torch.Tensor([playlist_id,artist_id]).long().view(1,-1)\n",
|
| 984 |
+
" X = X.to(device)\n",
|
| 985 |
+
" pred = model.forward(X)\n",
|
| 986 |
+
" return pred\n",
|
| 987 |
+
"\n",
|
| 988 |
+
"# Get predicted rating for a random user-item pair\n",
|
| 989 |
+
"rating = predict_rating(model,5,10,device)\n",
|
| 990 |
+
"print('Predicted rating is {:.1f}'.format(rating.detach().cpu().item()))"
|
| 991 |
+
]
|
| 992 |
+
},
|
| 993 |
+
{
|
| 994 |
+
"cell_type": "code",
|
| 995 |
+
"execution_count": 24,
|
| 996 |
+
"metadata": {},
|
| 997 |
+
"outputs": [],
|
| 998 |
+
"source": [
|
| 999 |
+
"def generate_recommendations(movies,X,model,playlist_id,device):\n",
|
| 1000 |
+
" # Get predicted ratings for every movie\n",
|
| 1001 |
+
" pred_ratings = []\n",
|
| 1002 |
+
" for movie in movies['artist_id'].tolist():\n",
|
| 1003 |
+
" pred = predict_rating(model,playlist_id,movie,device)\n",
|
| 1004 |
+
" pred_ratings.append(pred.detach().cpu().item())\n",
|
| 1005 |
+
" # Sort movies by predicted rating\n",
|
| 1006 |
+
" idxs = np.argsort(np.array(pred_ratings))[::-1]\n",
|
| 1007 |
+
" recs = movies.iloc[idxs]['artist_id'].values.tolist()\n",
|
| 1008 |
+
" # Filter out movies already watched by user\n",
|
| 1009 |
+
" movies_watched = X.loc[X['playlist_id']==playlist_id, 'artist_id'].tolist()\n",
|
| 1010 |
+
" recs = [rec for rec in recs if not rec in movies_watched]\n",
|
| 1011 |
+
" # Filter to top 10 recommendations\n",
|
| 1012 |
+
" recs = recs[:10]\n",
|
| 1013 |
+
" # Convert artist_ids to titles\n",
|
| 1014 |
+
" recs_names = []\n",
|
| 1015 |
+
" for rec in recs:\n",
|
| 1016 |
+
" recs_names.append(movies.loc[movies['artist_id']==rec,'artist_name'].values[0])\n",
|
| 1017 |
+
" return recs_names"
|
| 1018 |
+
]
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"cell_type": "code",
|
| 1022 |
+
"execution_count": 25,
|
| 1023 |
+
"metadata": {},
|
| 1024 |
+
"outputs": [
|
| 1025 |
+
{
|
| 1026 |
+
"ename": "FileNotFoundError",
|
| 1027 |
+
"evalue": "[Errno 2] No such file or directory: 'c:\\\\Users\\\\keese\\\\OneDrive - Duke University\\\\Desktop\\\\recommendation_module_project/raw/data\\\\artists.csv'",
|
| 1028 |
+
"output_type": "error",
|
| 1029 |
+
"traceback": [
|
| 1030 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 1031 |
+
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
| 1032 |
+
"Cell \u001b[1;32mIn[25], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Get recommendations for a random user\u001b[39;00m\n\u001b[0;32m 2\u001b[0m playlist_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m5\u001b[39m\n\u001b[1;32m----> 3\u001b[0m movies \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetcwd\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m/raw/data\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43martists.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4\u001b[0m recs \u001b[38;5;241m=\u001b[39m generate_recommendations(movies,X,model,playlist_id,device)\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,rec \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(recs):\n",
|
| 1033 |
+
"File \u001b[1;32mc:\\Users\\keese\\anaconda3\\envs\\term_project\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 1014\u001b[0m dialect,\n\u001b[0;32m 1015\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[0;32m 1023\u001b[0m )\n\u001b[0;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 1034 |
+
"File \u001b[1;32mc:\\Users\\keese\\anaconda3\\envs\\term_project\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
|
| 1035 |
+
"File \u001b[1;32mc:\\Users\\keese\\anaconda3\\envs\\term_project\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 1036 |
+
"File \u001b[1;32mc:\\Users\\keese\\anaconda3\\envs\\term_project\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
|
| 1037 |
+
"File \u001b[1;32mc:\\Users\\keese\\anaconda3\\envs\\term_project\\lib\\site-packages\\pandas\\io\\common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
|
| 1038 |
+
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'c:\\\\Users\\\\keese\\\\OneDrive - Duke University\\\\Desktop\\\\recommendation_module_project/raw/data\\\\artists.csv'"
|
| 1039 |
+
]
|
| 1040 |
+
}
|
| 1041 |
+
],
|
| 1042 |
+
"source": [
|
| 1043 |
+
"# Get recommendations for a random user\n",
|
| 1044 |
+
"playlist_id = 5\n",
|
| 1045 |
+
"movies = pd.read_csv(os.path.join(os.getcwd() + '/raw/data','artists.csv'))\n",
|
| 1046 |
+
"recs = generate_recommendations(movies,X,model,playlist_id,device)\n",
|
| 1047 |
+
"for i,rec in enumerate(recs):\n",
|
| 1048 |
+
" print('Recommendation {}: {}'.format(i,rec))"
|
| 1049 |
+
]
|
| 1050 |
+
}
|
| 1051 |
+
],
|
| 1052 |
+
"metadata": {
|
| 1053 |
+
"colab": {
|
| 1054 |
+
"machine_shape": "hm",
|
| 1055 |
+
"provenance": []
|
| 1056 |
+
},
|
| 1057 |
+
"kernelspec": {
|
| 1058 |
+
"display_name": "Python 3",
|
| 1059 |
+
"name": "python3"
|
| 1060 |
+
},
|
| 1061 |
+
"language_info": {
|
| 1062 |
+
"codemirror_mode": {
|
| 1063 |
+
"name": "ipython",
|
| 1064 |
+
"version": 3
|
| 1065 |
+
},
|
| 1066 |
+
"file_extension": ".py",
|
| 1067 |
+
"mimetype": "text/x-python",
|
| 1068 |
+
"name": "python",
|
| 1069 |
+
"nbconvert_exporter": "python",
|
| 1070 |
+
"pygments_lexer": "ipython3",
|
| 1071 |
+
"version": "3.9.19"
|
| 1072 |
+
}
|
| 1073 |
+
},
|
| 1074 |
+
"nbformat": 4,
|
| 1075 |
+
"nbformat_minor": 0
|
| 1076 |
+
}
|
recommendation_module_project/model_(2).ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
recommendation_module_project/model_(2).py
ADDED
|
@@ -0,0 +1,632 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""model (2).ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1suM8thtI4fHajXQDsGPBRodH_76G2iEI
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import urllib.request
|
| 12 |
+
import zipfile
|
| 13 |
+
import json
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import time
|
| 16 |
+
import torch
|
| 17 |
+
import numpy as np
|
| 18 |
+
import pandas as pd
|
| 19 |
+
import torch.nn as nn
|
| 20 |
+
import torch.nn.functional as F
|
| 21 |
+
import torch.optim as optim
|
| 22 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 23 |
+
from sklearn.model_selection import train_test_split
|
| 24 |
+
import matplotlib.pyplot as plt
|
| 25 |
+
from sklearn.preprocessing import LabelEncoder
|
| 26 |
+
|
| 27 |
+
from google.colab import drive
|
| 28 |
+
drive.mount('/content/drive')
|
| 29 |
+
|
| 30 |
+
# prompt: copy a file from another directory to current directory in python code and create folders if needed
|
| 31 |
+
|
| 32 |
+
import shutil
|
| 33 |
+
import os
|
| 34 |
+
|
| 35 |
+
def copy_file(src, dst):
|
| 36 |
+
"""
|
| 37 |
+
Copies a file from src to dst, creating any necessary directories.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
src: The path to the source file.
|
| 41 |
+
dst: The path to the destination file.
|
| 42 |
+
"""
|
| 43 |
+
# Create the destination directory if it doesn't exist.
|
| 44 |
+
dst_dir = os.path.dirname(dst)
|
| 45 |
+
if not os.path.exists(dst_dir):
|
| 46 |
+
os.makedirs(dst_dir)
|
| 47 |
+
|
| 48 |
+
# Copy the file.
|
| 49 |
+
shutil.copy2(src, dst)
|
| 50 |
+
|
| 51 |
+
copy_file('/content/drive/MyDrive/rec_data/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip')
|
| 52 |
+
|
| 53 |
+
def unzip_archive(filepath, dir_path):
|
| 54 |
+
with zipfile.ZipFile(f"{filepath}", 'r') as zip_ref:
|
| 55 |
+
zip_ref.extractall(dir_path)
|
| 56 |
+
|
| 57 |
+
unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')
|
| 58 |
+
|
| 59 |
+
import shutil
|
| 60 |
+
|
| 61 |
+
def make_dir(directory):
|
| 62 |
+
if os.path.exists(directory):
|
| 63 |
+
shutil.rmtree(directory)
|
| 64 |
+
os.makedirs(directory)
|
| 65 |
+
else:
|
| 66 |
+
os.makedirs(directory)
|
| 67 |
+
|
| 68 |
+
directory = os.getcwd() + '/data/raw/data'
|
| 69 |
+
make_dir(directory)
|
| 70 |
+
|
| 71 |
+
cols = [
|
| 72 |
+
'name',
|
| 73 |
+
'pid',
|
| 74 |
+
'num_followers',
|
| 75 |
+
'pos',
|
| 76 |
+
'artist_name',
|
| 77 |
+
'track_name',
|
| 78 |
+
'album_name'
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
directory = os.getcwd() + '/data/raw/playlists/data'
|
| 82 |
+
df = pd.DataFrame()
|
| 83 |
+
index = 0
|
| 84 |
+
# Loop through all files in the directory
|
| 85 |
+
for filename in os.listdir(directory):
|
| 86 |
+
# Check if the item is a file (not a subdirectory)
|
| 87 |
+
if os.path.isfile(os.path.join(directory, filename)):
|
| 88 |
+
if filename.find('.json') != -1 :
|
| 89 |
+
index += 1
|
| 90 |
+
|
| 91 |
+
# Print the filename or perform operations on the file
|
| 92 |
+
print(f'\r{filename}\t{index}/1000\t{((index/1000)*100):.1f}%', end='')
|
| 93 |
+
|
| 94 |
+
# If you need the full file path, you can use:
|
| 95 |
+
full_path = os.path.join(directory, filename)
|
| 96 |
+
|
| 97 |
+
with open(full_path, 'r') as file:
|
| 98 |
+
json_data = json.load(file)
|
| 99 |
+
|
| 100 |
+
temp = pd.DataFrame(json_data['playlists'])
|
| 101 |
+
expanded_df = temp.explode('tracks').reset_index(drop=True)
|
| 102 |
+
|
| 103 |
+
# Normalize the JSON data
|
| 104 |
+
json_normalized = pd.json_normalize(expanded_df['tracks'])
|
| 105 |
+
|
| 106 |
+
# Concatenate the original DataFrame with the normalized JSON data
|
| 107 |
+
result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)
|
| 108 |
+
|
| 109 |
+
result = result[cols]
|
| 110 |
+
|
| 111 |
+
df = pd.concat([df, result], axis=0, ignore_index=True)
|
| 112 |
+
|
| 113 |
+
if index % 50 == 0:
|
| 114 |
+
df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')
|
| 115 |
+
del df
|
| 116 |
+
df = pd.DataFrame()
|
| 117 |
+
if index % 200 == 0:
|
| 118 |
+
break
|
| 119 |
+
|
| 120 |
+
import pyarrow.parquet as pq
|
| 121 |
+
|
| 122 |
+
def read_parquet_folder(folder_path):
|
| 123 |
+
dataframes = []
|
| 124 |
+
for file in os.listdir(folder_path):
|
| 125 |
+
if file.endswith('.parquet'):
|
| 126 |
+
file_path = os.path.join(folder_path, file)
|
| 127 |
+
df = pd.read_parquet(file_path)
|
| 128 |
+
dataframes.append(df)
|
| 129 |
+
|
| 130 |
+
return pd.concat(dataframes, ignore_index=True)
|
| 131 |
+
|
| 132 |
+
folder_path = os.getcwd() + '/data/raw/data'
|
| 133 |
+
df = read_parquet_folder(folder_path)
|
| 134 |
+
|
| 135 |
+
directory = os.getcwd() + '/data/raw/mappings'
|
| 136 |
+
make_dir(directory)
|
| 137 |
+
|
| 138 |
+
def create_ids(df, col, name):
|
| 139 |
+
# Create a dictionary mapping unique values to IDs
|
| 140 |
+
value_to_id = {val: i for i, val in enumerate(df[col].unique())}
|
| 141 |
+
|
| 142 |
+
# Create a new column with the IDs
|
| 143 |
+
df[f'{name}_id'] = df[col].map(value_to_id)
|
| 144 |
+
df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/{name}.csv')
|
| 145 |
+
|
| 146 |
+
return df
|
| 147 |
+
|
| 148 |
+
# df = create_ids(df, 'artist_name', 'artist')
|
| 149 |
+
df = create_ids(df, 'pid', 'playlist')
|
| 150 |
+
# df = create_ids(df, 'track_name', 'track')
|
| 151 |
+
# df = create_ids(df, 'album_name', 'album')
|
| 152 |
+
|
| 153 |
+
df['song_count'] = df.groupby(['pid','artist_name','album_name'])['track_name'].transform('nunique')
|
| 154 |
+
|
| 155 |
+
df['playlist_songs'] = df.groupby(['pid'])['pos'].transform('max')
|
| 156 |
+
df['playlist_songs'] += 1
|
| 157 |
+
|
| 158 |
+
df['artist_album'] = df[['artist_name', 'album_name']].agg('::'.join, axis=1)
|
| 159 |
+
|
| 160 |
+
# Step 2: Create a dictionary mapping unique combined values to IDs
|
| 161 |
+
value_to_id = {val: i for i, val in enumerate(df['artist_album'].unique())}
|
| 162 |
+
|
| 163 |
+
# Step 3: Map these IDs back to the DataFrame
|
| 164 |
+
df['artist_album_id'] = df['artist_album'].map(value_to_id)
|
| 165 |
+
|
| 166 |
+
df[[f'artist_album_id', 'artist_album', 'artist_name', 'album_name', 'track_name']].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/artist_album.csv')
|
| 167 |
+
|
| 168 |
+
# df = df.groupby(['playlist_id','artist_album','artist_album_id','playlist_songs']).agg({
|
| 169 |
+
# 'song_count': 'sum',
|
| 170 |
+
# 'track_name': '|'.join,
|
| 171 |
+
# 'track_name': '|'.join,
|
| 172 |
+
# }).reset_index()
|
| 173 |
+
df['song_count'] = df.groupby(['playlist_id','artist_album_id'])['song_count'].transform('sum')
|
| 174 |
+
|
| 175 |
+
# Encode the genres data
|
| 176 |
+
encoder = LabelEncoder()
|
| 177 |
+
encoder.fit(df['track_name'])
|
| 178 |
+
df['track_id'] = encoder.transform(df['track_name'])
|
| 179 |
+
|
| 180 |
+
# df['artist_count'] = df.groupby(['playlist_id','artist_id'])['song_id'].transform('nunique')
|
| 181 |
+
# df['album_count'] = df.groupby(['playlist_id','artist_id','album_id'])['song_id'].transform('nunique')
|
| 182 |
+
# df['song_count'] = df.groupby(['artist_id'])['song_id'].transform('count')
|
| 183 |
+
|
| 184 |
+
# df['artist_percent'] = df['artist_count'] / df['playlist_songs']
|
| 185 |
+
df['song_percent'] = df['song_count'] / df['playlist_songs']
|
| 186 |
+
# df['album_percent'] = df['album_count'] / df['playlist_songs']
|
| 187 |
+
|
| 188 |
+
import numpy as np
|
| 189 |
+
|
| 190 |
+
# Assuming you have a DataFrame 'df' with a column 'column_name'
|
| 191 |
+
df['song_percent'] = 1 / (1 + np.exp(-df['song_percent']))
|
| 192 |
+
|
| 193 |
+
artists = df.loc[:,['playlist_id','artist_album_id','song_percent']].drop_duplicates()
|
| 194 |
+
artists.head()
|
| 195 |
+
|
| 196 |
+
X = artists.loc[:,['playlist_id','artist_album_id',]]
|
| 197 |
+
y = artists.loc[:,'song_percent']
|
| 198 |
+
|
| 199 |
+
# Split our data into training and test sets
|
| 200 |
+
X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)
|
| 201 |
+
|
| 202 |
+
def prep_dataloaders(X_train,y_train,X_val,y_val,batch_size):
|
| 203 |
+
# Convert training and test data to TensorDatasets
|
| 204 |
+
trainset = TensorDataset(torch.from_numpy(np.array(X_train)).long(),
|
| 205 |
+
torch.from_numpy(np.array(y_train)).float())
|
| 206 |
+
valset = TensorDataset(torch.from_numpy(np.array(X_val)).long(),
|
| 207 |
+
torch.from_numpy(np.array(y_val)).float())
|
| 208 |
+
|
| 209 |
+
# Create Dataloaders for our training and test data to allow us to iterate over minibatches
|
| 210 |
+
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
|
| 211 |
+
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)
|
| 212 |
+
|
| 213 |
+
return trainloader, valloader
|
| 214 |
+
|
| 215 |
+
batchsize = 64
|
| 216 |
+
trainloader,valloader = prep_dataloaders(X_train,y_train,X_val,y_val,batchsize)
|
| 217 |
+
|
| 218 |
+
class NNColabFiltering(nn.Module):
|
| 219 |
+
|
| 220 |
+
def __init__(self, n_playlists, n_artists, embedding_dim_users, embedding_dim_items, n_activations, rating_range):
|
| 221 |
+
super().__init__()
|
| 222 |
+
self.user_embeddings = nn.Embedding(num_embeddings=n_playlists,embedding_dim=embedding_dim_users)
|
| 223 |
+
self.item_embeddings = nn.Embedding(num_embeddings=n_artists,embedding_dim=embedding_dim_items)
|
| 224 |
+
self.fc1 = nn.Linear(embedding_dim_users+embedding_dim_items,n_activations)
|
| 225 |
+
self.fc2 = nn.Linear(n_activations,1)
|
| 226 |
+
self.rating_range = rating_range
|
| 227 |
+
|
| 228 |
+
def forward(self, X):
|
| 229 |
+
# Get embeddings for minibatch
|
| 230 |
+
embedded_users = self.user_embeddings(X[:,0])
|
| 231 |
+
embedded_items = self.item_embeddings(X[:,1])
|
| 232 |
+
# Concatenate user and item embeddings
|
| 233 |
+
embeddings = torch.cat([embedded_users,embedded_items],dim=1)
|
| 234 |
+
# Pass embeddings through network
|
| 235 |
+
preds = self.fc1(embeddings)
|
| 236 |
+
preds = F.relu(preds)
|
| 237 |
+
preds = self.fc2(preds)
|
| 238 |
+
# Scale predicted ratings to target-range [low,high]
|
| 239 |
+
preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]
|
| 240 |
+
return preds
|
| 241 |
+
|
| 242 |
+
class PMFRecommender(nn.Module):
|
| 243 |
+
|
| 244 |
+
def __init__(self,n_users, n_items, embedding_dim ,rating_range):
|
| 245 |
+
super().__init__()
|
| 246 |
+
self.user_embeddings = nn.Embedding(num_embeddings=n_users,embedding_dim=embedding_dim) # user embeddings
|
| 247 |
+
self.user_bias = nn.Embedding(num_embeddings=n_users,embedding_dim=1) # user bias
|
| 248 |
+
self.item_embeddings = nn.Embedding(num_embeddings=n_items,embedding_dim=embedding_dim) # item embeddings
|
| 249 |
+
self.item_bias = nn.Embedding(num_embeddings=n_items,embedding_dim=1) # item bias
|
| 250 |
+
self.rating_range = rating_range # range of expected ratings e.g. 0-5
|
| 251 |
+
|
| 252 |
+
def forward(self, X):
|
| 253 |
+
embedded_users = self.user_embeddings(X[:,0]) # dims = [batch_size, embedding_dim]
|
| 254 |
+
embedded_items = self.item_embeddings(X[:,1]) # dims = [batch_size, embedding_dim]
|
| 255 |
+
# Take dot product of each user embedding with the embedding of item to be rated to get the predicted rating
|
| 256 |
+
preds = torch.sum(embedded_users * embedded_items, dim=1, keepdim=True)
|
| 257 |
+
# Add user and item bias to rating
|
| 258 |
+
preds = preds.view(-1,1) + self.user_bias(X[:,0]) + self.item_bias(X[:,1])
|
| 259 |
+
# Scale predicted ratings to target-range [low,high]
|
| 260 |
+
preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]
|
| 261 |
+
return preds
|
| 262 |
+
|
| 263 |
+
class NNHybridFiltering(nn.Module):
|
| 264 |
+
|
| 265 |
+
def __init__(self, n_users, n_items, n_genres, embdim_users, embdim_items, embdim_genres, n_activations, rating_range):
|
| 266 |
+
super().__init__()
|
| 267 |
+
self.user_embeddings = nn.Embedding(num_embeddings=n_users,embedding_dim=embdim_users)
|
| 268 |
+
self.item_embeddings = nn.Embedding(num_embeddings=n_items,embedding_dim=embdim_items)
|
| 269 |
+
self.genre_embeddings = nn.Embedding(num_embeddings=n_genres,embedding_dim=embdim_genres)
|
| 270 |
+
self.fc1 = nn.Linear(embdim_users+embdim_items+embdim_genres,n_activations)
|
| 271 |
+
self.fc2 = nn.Linear(n_activations,1)
|
| 272 |
+
self.rating_range = rating_range
|
| 273 |
+
|
| 274 |
+
def forward(self, X):
|
| 275 |
+
# Get embeddings for minibatch
|
| 276 |
+
embedded_users = self.user_embeddings(X[:,0])
|
| 277 |
+
embedded_items = self.item_embeddings(X[:,1])
|
| 278 |
+
embedded_genres = self.genre_embeddings(X[:,2])
|
| 279 |
+
# Concatenate user, item and genre embeddings
|
| 280 |
+
embeddings = torch.cat([embedded_users,embedded_items,embedded_genres],dim=1)
|
| 281 |
+
# Pass embeddings through network
|
| 282 |
+
preds = self.fc1(embeddings)
|
| 283 |
+
preds = F.relu(preds)
|
| 284 |
+
preds = self.fc2(preds)
|
| 285 |
+
# Scale predicted ratings to target-range [low,high]
|
| 286 |
+
preds = torch.sigmoid(preds) * (self.rating_range[1]-self.rating_range[0]) + self.rating_range[0]
|
| 287 |
+
return preds
|
| 288 |
+
|
| 289 |
+
class EfficientHybridFiltering(nn.Module):
|
| 290 |
+
def __init__(self, n_users, n_items, n_genres, embdim, n_factors, rating_range):
|
| 291 |
+
super().__init__()
|
| 292 |
+
self.user_factors = nn.Embedding(n_users, n_factors)
|
| 293 |
+
self.item_factors = nn.Embedding(n_items, n_factors)
|
| 294 |
+
self.genre_embeddings = nn.Embedding(n_genres, embdim)
|
| 295 |
+
self.user_bias = nn.Embedding(n_users, 1)
|
| 296 |
+
self.item_bias = nn.Embedding(n_items, 1)
|
| 297 |
+
self.genre_projection = nn.Linear(embdim, n_factors)
|
| 298 |
+
self.global_bias = nn.Parameter(torch.zeros(1))
|
| 299 |
+
self.rating_range = rating_range
|
| 300 |
+
|
| 301 |
+
def forward(self, X):
|
| 302 |
+
users, items, genres = X[:, 0], X[:, 1], X[:, 2]
|
| 303 |
+
user_factors = self.user_factors(users)
|
| 304 |
+
item_factors = self.item_factors(items)
|
| 305 |
+
genre_emb = self.genre_embeddings(genres)
|
| 306 |
+
genre_factors = self.genre_projection(genre_emb)
|
| 307 |
+
|
| 308 |
+
dot = (user_factors * item_factors * genre_factors).sum(1)
|
| 309 |
+
bias = self.global_bias + self.user_bias(users).squeeze() + self.item_bias(items).squeeze()
|
| 310 |
+
|
| 311 |
+
preds = torch.sigmoid(dot + bias)
|
| 312 |
+
preds = preds * (self.rating_range[1] - self.rating_range[0]) + self.rating_range[0]
|
| 313 |
+
return preds
|
| 314 |
+
|
| 315 |
+
def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=5, scheduler=None):
|
| 316 |
+
model = model.to(device) # Send model to GPU if available
|
| 317 |
+
since = time.time()
|
| 318 |
+
|
| 319 |
+
costpaths = {'train':[],'val':[]}
|
| 320 |
+
|
| 321 |
+
for epoch in range(num_epochs):
|
| 322 |
+
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
|
| 323 |
+
print('-' * 10)
|
| 324 |
+
|
| 325 |
+
# Each epoch has a training and validation phase
|
| 326 |
+
for phase in ['train', 'val']:
|
| 327 |
+
if phase == 'train':
|
| 328 |
+
model.train() # Set model to training mode
|
| 329 |
+
else:
|
| 330 |
+
model.eval() # Set model to evaluate mode
|
| 331 |
+
|
| 332 |
+
running_loss = 0.0
|
| 333 |
+
|
| 334 |
+
# Get the inputs and labels, and send to GPU if available
|
| 335 |
+
index = 0
|
| 336 |
+
for (inputs,labels) in dataloaders[phase]:
|
| 337 |
+
inputs = inputs.to(device)
|
| 338 |
+
labels = labels.to(device)
|
| 339 |
+
|
| 340 |
+
# Zero the weight gradients
|
| 341 |
+
optimizer.zero_grad()
|
| 342 |
+
|
| 343 |
+
# Forward pass to get outputs and calculate loss
|
| 344 |
+
# Track gradient only for training data
|
| 345 |
+
with torch.set_grad_enabled(phase == 'train'):
|
| 346 |
+
outputs = model.forward(inputs).view(-1)
|
| 347 |
+
loss = criterion(outputs, labels)
|
| 348 |
+
|
| 349 |
+
# Backpropagation to get the gradients with respect to each weight
|
| 350 |
+
# Only if in train
|
| 351 |
+
if phase == 'train':
|
| 352 |
+
loss.backward()
|
| 353 |
+
# Update the weights
|
| 354 |
+
optimizer.step()
|
| 355 |
+
|
| 356 |
+
# Convert loss into a scalar and add it to running_loss
|
| 357 |
+
running_loss += np.sqrt(loss.item()) * labels.size(0)
|
| 358 |
+
print(f'\r{running_loss} {index} {(index / len(dataloaders[phase]))*100:.2f}%', end='')
|
| 359 |
+
index +=1
|
| 360 |
+
|
| 361 |
+
# Step along learning rate scheduler when in train
|
| 362 |
+
if (phase == 'train') and (scheduler is not None):
|
| 363 |
+
scheduler.step()
|
| 364 |
+
|
| 365 |
+
# Calculate and display average loss and accuracy for the epoch
|
| 366 |
+
epoch_loss = running_loss / len(dataloaders[phase].dataset)
|
| 367 |
+
costpaths[phase].append(epoch_loss)
|
| 368 |
+
print('\n{} loss: {:.4f}'.format(phase, epoch_loss))
|
| 369 |
+
|
| 370 |
+
time_elapsed = time.time() - since
|
| 371 |
+
print('Training complete in {:.0f}m {:.0f}s'.format(
|
| 372 |
+
time_elapsed // 60, time_elapsed % 60))
|
| 373 |
+
|
| 374 |
+
return costpaths
|
| 375 |
+
|
| 376 |
+
# Train the model
|
| 377 |
+
dataloaders = {'train':trainloader, 'val':valloader}
|
| 378 |
+
n_users = X.loc[:,'playlist_id'].max()+1
|
| 379 |
+
n_items = X.loc[:,'artist_album_id'].max()+1
|
| 380 |
+
model = NNColabFiltering(n_users,n_items,embedding_dim_users=50, embedding_dim_items=50, n_activations = 100,rating_range=[0.,1.])
|
| 381 |
+
criterion = nn.MSELoss()
|
| 382 |
+
lr=0.001
|
| 383 |
+
n_epochs=10
|
| 384 |
+
wd=1e-3
|
| 385 |
+
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
|
| 386 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 387 |
+
|
| 388 |
+
cost_paths = train_model(model,criterion,optimizer,dataloaders, device,n_epochs, scheduler=None)
|
| 389 |
+
|
| 390 |
+
# Plot the cost over training and validation sets
|
| 391 |
+
fig,ax = plt.subplots(1,2,figsize=(15,5))
|
| 392 |
+
for i,key in enumerate(cost_paths.keys()):
|
| 393 |
+
ax_sub=ax[i%3]
|
| 394 |
+
ax_sub.plot(cost_paths[key])
|
| 395 |
+
ax_sub.set_title(key)
|
| 396 |
+
ax_sub.set_xlabel('Epoch')
|
| 397 |
+
ax_sub.set_ylabel('Loss')
|
| 398 |
+
plt.show()
|
| 399 |
+
|
| 400 |
+
# Save the entire model
|
| 401 |
+
torch.save(model, os.getcwd() + '/recommender.pt')
|
| 402 |
+
|
| 403 |
+
df[df['playlist_id'] == 5]
|
| 404 |
+
|
| 405 |
+
def predict_rating(model,userId,movieId, device):
|
| 406 |
+
# Get predicted rating for a specific user-item pair from model
|
| 407 |
+
model = model.to(device)
|
| 408 |
+
with torch.no_grad():
|
| 409 |
+
model.eval()
|
| 410 |
+
X = torch.Tensor([userId,movieId]).long().view(1,-1)
|
| 411 |
+
X = X.to(device)
|
| 412 |
+
pred = model.forward(X)
|
| 413 |
+
return pred
|
| 414 |
+
|
| 415 |
+
# Get predicted rating for a random user-item pair
|
| 416 |
+
rating = predict_rating(model,5,10,device)
|
| 417 |
+
print('Predicted rating is {:.1f}'.format(rating.detach().cpu().item()))
|
| 418 |
+
|
| 419 |
+
def generate_recommendations(movies,X,model,userId,device):
|
| 420 |
+
# Get predicted ratings for every movie
|
| 421 |
+
pred_ratings = []
|
| 422 |
+
for movie in movies['artist_album_id'].tolist():
|
| 423 |
+
pred = predict_rating(model,userId,movie,device)
|
| 424 |
+
pred_ratings.append(pred.detach().cpu().item())
|
| 425 |
+
# Sort movies by predicted rating
|
| 426 |
+
idxs = np.argsort(np.array(pred_ratings))[::-1]
|
| 427 |
+
recs = movies.iloc[idxs]['artist_album_id'].values.tolist()
|
| 428 |
+
# Filter out movies already watched by user
|
| 429 |
+
movies_watched = X.loc[X['playlist_id']==userId, 'artist_album_id'].tolist()
|
| 430 |
+
recs = [rec for rec in recs if not rec in movies_watched]
|
| 431 |
+
# Filter to top 10 recommendations
|
| 432 |
+
recs = recs[:10]
|
| 433 |
+
# Convert movieIDs to titles
|
| 434 |
+
recs_names = []
|
| 435 |
+
for rec in recs:
|
| 436 |
+
recs_names.append(movies.loc[movies['artist_album_id']==rec,'title'].values[0])
|
| 437 |
+
return recs_names
|
| 438 |
+
|
| 439 |
+
import torch
|
| 440 |
+
import torch.nn.functional as F
|
| 441 |
+
import numpy as np
|
| 442 |
+
|
| 443 |
+
def generate_recommendations_efficient(movies, user_data, model, userId, device, top_n=10, batch_size=1024):
|
| 444 |
+
model.eval()
|
| 445 |
+
|
| 446 |
+
# Get all movie IDs
|
| 447 |
+
all_movie_ids = torch.tensor(movies['artist_album_id'].values, dtype=torch.long, device=device)
|
| 448 |
+
|
| 449 |
+
# Create a tensor of user IDs
|
| 450 |
+
user_ids = torch.full((len(all_movie_ids),), userId, dtype=torch.long, device=device)
|
| 451 |
+
|
| 452 |
+
# Initialize tensor to store all predictions
|
| 453 |
+
all_predictions = torch.zeros(len(all_movie_ids), device=device)
|
| 454 |
+
|
| 455 |
+
# Generate predictions in batches
|
| 456 |
+
with torch.no_grad():
|
| 457 |
+
for i in range(0, len(all_movie_ids), batch_size):
|
| 458 |
+
batch_user_ids = user_ids[i:i+batch_size]
|
| 459 |
+
batch_movie_ids = all_movie_ids[i:i+batch_size]
|
| 460 |
+
|
| 461 |
+
input_tensor = torch.stack([batch_user_ids, batch_movie_ids], dim=1)
|
| 462 |
+
batch_predictions = model(input_tensor).squeeze()
|
| 463 |
+
all_predictions[i:i+batch_size] = batch_predictions
|
| 464 |
+
|
| 465 |
+
# Convert to numpy for easier handling
|
| 466 |
+
predictions = all_predictions.cpu().numpy()
|
| 467 |
+
|
| 468 |
+
# Get movies watched by the user
|
| 469 |
+
movies_watched = set(user_data.loc[user_data['playlist_id'] == userId, 'artist_album_id'].tolist())
|
| 470 |
+
|
| 471 |
+
# Create a mask for unwatched movies
|
| 472 |
+
unwatched_mask = np.isin(movies['artist_album_id'].values, list(movies_watched), invert=True)
|
| 473 |
+
|
| 474 |
+
# Get top N recommendations
|
| 475 |
+
top_indices = np.argsort(predictions[unwatched_mask])[-top_n:][::-1]
|
| 476 |
+
recs = movies['artist_album_id'].values[unwatched_mask][top_indices]
|
| 477 |
+
|
| 478 |
+
# Get the titles of recommended movies
|
| 479 |
+
recs_names = movies.loc[movies['artist_album_id'].isin(recs), 'artist_album'].values
|
| 480 |
+
|
| 481 |
+
return recs_names.tolist()
|
| 482 |
+
|
| 483 |
+
# Example usage
|
| 484 |
+
userId = 5 # The user you want recommendations for
|
| 485 |
+
movies = pd.read_csv(os.path.join(os.getcwd() + '/data/raw/mappings','artist_album.csv'))
|
| 486 |
+
movies = movies[['artist_album_id','artist_album','artist_name','album_name']].drop_duplicates()
|
| 487 |
+
recommendations = generate_recommendations_efficient(movies, X, model, userId, device)
|
| 488 |
+
|
| 489 |
+
print("Recommendations for user", userId)
|
| 490 |
+
for rec in recommendations:
|
| 491 |
+
print(rec)
|
| 492 |
+
|
| 493 |
+
df[df['playlist_id'] == 5]
|
| 494 |
+
|
| 495 |
+
import torch
|
| 496 |
+
import torch.nn.functional as F
|
| 497 |
+
|
| 498 |
+
def generate_recommendations_for_new_user(model, item_id, n_similar_items=50, top_n=10):
|
| 499 |
+
model.eval()
|
| 500 |
+
device = next(model.parameters()).device
|
| 501 |
+
|
| 502 |
+
# Get the embedding for the given item
|
| 503 |
+
item_embedding = model.item_embeddings.weight[item_id].unsqueeze(0)
|
| 504 |
+
|
| 505 |
+
# Compute similarity with all other items
|
| 506 |
+
all_item_embeddings = model.item_embeddings.weight
|
| 507 |
+
similarity = F.cosine_similarity(item_embedding, all_item_embeddings)
|
| 508 |
+
|
| 509 |
+
# Get top N similar items (excluding the item itself)
|
| 510 |
+
top_similar_indices = torch.argsort(similarity, descending=True)[1:n_similar_items+1]
|
| 511 |
+
|
| 512 |
+
# Create a "virtual" user embedding based on the similar items
|
| 513 |
+
virtual_user_embedding = torch.mean(all_item_embeddings[top_similar_indices], dim=0).unsqueeze(0)
|
| 514 |
+
|
| 515 |
+
# Predict ratings for all items using this virtual user
|
| 516 |
+
n_items = model.item_embeddings.num_embeddings
|
| 517 |
+
all_items = torch.arange(n_items, device=device)
|
| 518 |
+
|
| 519 |
+
# Generate predictions in batches
|
| 520 |
+
batch_size = 1000
|
| 521 |
+
all_predictions = []
|
| 522 |
+
|
| 523 |
+
for i in range(0, n_items, batch_size):
|
| 524 |
+
batch_items = all_items[i:i+batch_size]
|
| 525 |
+
batch_users = virtual_user_embedding.repeat(len(batch_items), 1)
|
| 526 |
+
batch_items_emb = model.item_embeddings(batch_items)
|
| 527 |
+
|
| 528 |
+
# Concatenate user and item embeddings
|
| 529 |
+
embeddings = torch.cat([batch_users, batch_items_emb], dim=1)
|
| 530 |
+
|
| 531 |
+
# Pass through the network
|
| 532 |
+
with torch.no_grad():
|
| 533 |
+
preds = model.fc1(embeddings)
|
| 534 |
+
preds = F.relu(preds)
|
| 535 |
+
preds = model.fc2(preds)
|
| 536 |
+
preds = torch.sigmoid(preds) * (model.rating_range[1] - model.rating_range[0]) + model.rating_range[0]
|
| 537 |
+
|
| 538 |
+
all_predictions.append(preds)
|
| 539 |
+
|
| 540 |
+
predictions = torch.cat(all_predictions).squeeze()
|
| 541 |
+
|
| 542 |
+
# Get top N recommendations
|
| 543 |
+
top_indices = torch.argsort(predictions, descending=True)[:top_n]
|
| 544 |
+
top_items = all_items[top_indices].cpu().numpy()
|
| 545 |
+
top_scores = predictions[top_indices].cpu().numpy()
|
| 546 |
+
|
| 547 |
+
return top_items, top_scores
|
| 548 |
+
|
| 549 |
+
# Example usage
|
| 550 |
+
item_id = 5 # The item the new user has shown interest in
|
| 551 |
+
|
| 552 |
+
top_items, top_scores = generate_recommendations_for_new_user(model, item_id)
|
| 553 |
+
|
| 554 |
+
print("Recommendations based on item", item_id)
|
| 555 |
+
for item, score in zip(top_items, top_scores):
|
| 556 |
+
print(f"Item ID: {item}, Predicted Rating: {score:.2f}")
|
| 557 |
+
|
| 558 |
+
def generate_recommendations(movies,X,model,userId,encoder,device):
|
| 559 |
+
# Get predicted ratings for every movie
|
| 560 |
+
pred_ratings = []
|
| 561 |
+
for movie in movies['artist_album_id'].tolist():
|
| 562 |
+
genre = movies.loc[movies['artist_album_id']==movie,'track_name'].iloc[0]
|
| 563 |
+
pred = predict_rating(model,userId,movie,genre,encoder,device)
|
| 564 |
+
pred_ratings.append(pred.detach().cpu().item())
|
| 565 |
+
print(f"\r{len(pred_ratings)}\t{len(movies['artist_album_id'].tolist())}", end='')
|
| 566 |
+
# Sort movies by predicted rating
|
| 567 |
+
idxs = np.argsort(np.array(pred_ratings))[::-1]
|
| 568 |
+
recs = movies.iloc[idxs]['artist_album_id'].values.tolist()
|
| 569 |
+
# Filter out movies already watched by user
|
| 570 |
+
movies_watched = X.loc[X['playlist_id']==userId, 'artist_album_id'].tolist()
|
| 571 |
+
recs = [rec for rec in recs if not rec in movies_watched]
|
| 572 |
+
# Filter to top 10 recommendations
|
| 573 |
+
recs = recs[:10]
|
| 574 |
+
# Convert movieIDs to titles
|
| 575 |
+
recs_names = []
|
| 576 |
+
for rec in recs:
|
| 577 |
+
recs_names.append(movies.loc[movies['artist_album_id']==rec,'album_name'].values[0])
|
| 578 |
+
return recs_names
|
| 579 |
+
|
| 580 |
+
import torch
|
| 581 |
+
import torch.nn.functional as F
|
| 582 |
+
|
| 583 |
+
def generate_rapid_recommendations(model, user_id, item_ids, genre_ids, top_n=10, batch_size=10000):
|
| 584 |
+
model.eval()
|
| 585 |
+
device = next(model.parameters()).device
|
| 586 |
+
|
| 587 |
+
# Prepare input data
|
| 588 |
+
num_items = len(item_ids)
|
| 589 |
+
users = torch.full((num_items,), user_id, dtype=torch.long, device=device)
|
| 590 |
+
items = torch.tensor(item_ids, dtype=torch.long, device=device)
|
| 591 |
+
genres = torch.tensor(genre_ids, dtype=torch.long, device=device)
|
| 592 |
+
|
| 593 |
+
# Initialize tensor to store all predictions
|
| 594 |
+
all_predictions = torch.zeros(num_items, device=device)
|
| 595 |
+
|
| 596 |
+
# Generate predictions in batches
|
| 597 |
+
with torch.no_grad():
|
| 598 |
+
for i in range(0, num_items, batch_size):
|
| 599 |
+
batch_users = users[i:i+batch_size]
|
| 600 |
+
batch_items = items[i:i+batch_size]
|
| 601 |
+
batch_genres = genres[i:i+batch_size]
|
| 602 |
+
|
| 603 |
+
X = torch.stack([batch_users, batch_items, batch_genres], dim=1)
|
| 604 |
+
batch_predictions = model(X).squeeze()
|
| 605 |
+
all_predictions[i:i+batch_size] = batch_predictions
|
| 606 |
+
|
| 607 |
+
# Get the top N recommendations
|
| 608 |
+
top_indices = torch.argsort(all_predictions, descending=True)[:top_n]
|
| 609 |
+
top_items = items[top_indices].cpu().numpy()
|
| 610 |
+
top_scores = all_predictions[top_indices].cpu().numpy()
|
| 611 |
+
|
| 612 |
+
return top_items, top_scores
|
| 613 |
+
|
| 614 |
+
# Example usage
|
| 615 |
+
user_id = 5 # The user you want recommendations for
|
| 616 |
+
item_ids = list(df) # Assume we have 10,000 items
|
| 617 |
+
genre_ids = [0] * 10000 # Assume all items are of genre 0 for this example
|
| 618 |
+
|
| 619 |
+
# Generate recommendations
|
| 620 |
+
top_items, top_scores = generate_rapid_recommendations(model, user_id, item_ids, genre_ids)
|
| 621 |
+
|
| 622 |
+
# Print results
|
| 623 |
+
for item, score in zip(top_items, top_scores):
|
| 624 |
+
print(f"Item ID: {item}, Predicted Rating: {score:.2f}")
|
| 625 |
+
|
| 626 |
+
# Get recommendations for a random user
|
| 627 |
+
userId = 5
|
| 628 |
+
movies = pd.read_csv(os.path.join(os.getcwd() + '/data/raw/mappings','artist_album.csv'))
|
| 629 |
+
recs = generate_recommendations(movies,X,model,userId,encoder,device)
|
| 630 |
+
for i,rec in enumerate(recs):
|
| 631 |
+
print('Recommendation {}: {}'.format(i,rec))
|
| 632 |
+
|
recommendation_module_project/recommender.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7c8e2b8fc581d039c84e3e0c9983b17a6fa328563c253a03b139d49dc87f3e9
|
| 3 |
+
size 120583728
|