From d345d3eb9f00af9f336ae80d127085a97614930d Mon Sep 17 00:00:00 2001 From: Susan Li Date: Mon, 17 Jan 2022 00:24:53 -0500 Subject: [PATCH] Add notebook --- BPR_Music_RecSys.ipynb | 1239 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1239 insertions(+) create mode 100644 BPR_Music_RecSys.ipynb diff --git a/BPR_Music_RecSys.ipynb b/BPR_Music_RecSys.ipynb new file mode 100644 index 0000000..8bf356c --- /dev/null +++ b/BPR_Music_RecSys.ipynb @@ -0,0 +1,1239 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "e08f25dd", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "from scipy.sparse.linalg import spsolve\n", + "import random\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "import implicit\n", + "\n", + "# Load the data\n", + "raw_data = pd.read_table('data/usersha1-artmbid-artname-plays.tsv')\n", + "raw_data = raw_data.drop(raw_data.columns[1], axis=1)\n", + "raw_data.columns = ['user', 'artist', 'plays']" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "52d78ae2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userartistplays
000000c289a1829a808ac09c00daf10bc3c4e223bdie Ärzte1099
100000c289a1829a808ac09c00daf10bc3c4e223bmelissa etheridge897
200000c289a1829a808ac09c00daf10bc3c4e223belvenking717
300000c289a1829a808ac09c00daf10bc3c4e223bjuliette & the licks706
400000c289a1829a808ac09c00daf10bc3c4e223bred hot chili peppers691
\n", + "
" + ], + "text/plain": [ + " user artist plays\n", + "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099\n", + "1 00000c289a1829a808ac09c00daf10bc3c4e223b melissa etheridge 897\n", + "2 00000c289a1829a808ac09c00daf10bc3c4e223b elvenking 717\n", + "3 00000c289a1829a808ac09c00daf10bc3c4e223b juliette & the licks 706\n", + "4 00000c289a1829a808ac09c00daf10bc3c4e223b red hot chili peppers 691" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d2bd5fc5", + "metadata": {}, + "outputs": [], + "source": [ + "raw_data1 = raw_data[0:2000000]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "58320658", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of users in the data: 40913\n", + "Total number of artists in the data: 110820\n" + ] + } + ], + "source": [ + "print(f'Total number of users in the data: {raw_data1.user.nunique()}')\n", + "print(f'Total number of artists in the data: {raw_data1.artist.nunique()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ffcd2158", + "metadata": {}, + "outputs": [], + "source": [ + "data = raw_data1.dropna()\n", + "data = data.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "79cc77d8", + "metadata": {}, + "outputs": [], + "source": [ + "data['user'] = data['user'].astype('category')\n", + "data['artist'] = data['artist'].astype('category')\n", + "data['user_id'] = data['user'].cat.codes\n", + "data['artist_id'] = data['artist'].cat.codes" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5912ec13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userartistplaysuser_idartist_id
000000c289a1829a808ac09c00daf10bc3c4e223bdie Ärzte1099030264
100000c289a1829a808ac09c00daf10bc3c4e223bmelissa etheridge897066512
\n", + "
" + ], + "text/plain": [ + " user artist plays \\\n", + "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099 \n", + "1 00000c289a1829a808ac09c00daf10bc3c4e223b melissa etheridge 897 \n", + "\n", + " user_id artist_id \n", + "0 0 30264 \n", + "1 0 66512 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0c90e21b", + "metadata": {}, + "outputs": [], + "source": [ + "sparse_item_user = sparse.csr_matrix((data['plays'].astype(float), (data['artist_id'], data['user_id'])))\n", + "sparse_user_item = sparse.csr_matrix((data['plays'].astype(float), (data['user_id'], data['artist_id'])))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3ec92ab1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<110820x40913 sparse matrix of type ''\n", + "\twith 1999967 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparse_item_user" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "165220e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<40913x110820 sparse matrix of type ''\n", + "\twith 1999967 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparse_user_item" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a6a0a966", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "99.95588936009682" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix_size = sparse_user_item.shape[0]*sparse_user_item.shape[1]\n", + "num_purchases = len(sparse_user_item.nonzero()[0])\n", + "sparsity = 100*(1 - (num_purchases / matrix_size))\n", + "sparsity" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f2eb765a", + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "def make_train(ratings, pct_test = 0.2):\n", + " \n", + " test_set = ratings.copy() # Make a copy of the original set to be the test set. \n", + " test_set[test_set != 0] = 1 # Store the test set as a binary preference matrix\n", + " \n", + " training_set = ratings.copy() # Make a copy of the original data we can alter as our training set. \n", + " \n", + " nonzero_inds = training_set.nonzero() # Find the indices in the ratings data where an interaction exists\n", + " nonzero_pairs = list(zip(nonzero_inds[0], nonzero_inds[1])) # Zip these pairs together of item,user index into list\n", + "\n", + " \n", + " random.seed(0) # Set the random seed to zero for reproducibility\n", + " \n", + " num_samples = int(np.ceil(pct_test*len(nonzero_pairs))) # Round the number of samples needed to the nearest integer\n", + " samples = random.sample(nonzero_pairs, num_samples) # Sample a random number of item-user pairs without replacement\n", + "\n", + " item_inds = [index[0] for index in samples] # Get the item row indices\n", + "\n", + " user_inds = [index[1] for index in samples] # Get the user column indices\n", + "\n", + " \n", + " training_set[item_inds, user_inds] = 0 # Assign all of the randomly chosen user-item pairs to zero\n", + " training_set.eliminate_zeros() # Get rid of zeros in sparse array storage after update to save space\n", + " \n", + " return training_set, test_set, list(set(user_inds))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2f0c5ec2", + "metadata": {}, + "outputs": [], + "source": [ + "product_train, product_test, product_users_altered = make_train(sparse_item_user, pct_test = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "772579e9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Intel MKL BLAS detected. Its highly recommend to set the environment variable 'export MKL_NUM_THREADS=1' to disable its internal multithreading\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1578cae65f7441e4a76cf9c5cad9092b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/40 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userartistplaysuser_idartist_id
400000c289a1829a808ac09c00daf10bc3c4e223bred hot chili peppers691080876
1422000429493d9716b66b02180d208d09b5b89fbe64red hot chili peppers2342980876
21390007e26aafcfc0b6dcb87d7041583fbb7cced88ared hot chili peppers1594480876
3284000b0bb32f149504e1df3cce85b6bfd20cef3dd0red hot chili peppers466880876
3322000b2ee840cbda56e0f41c8f248c4fb7ee275db3red hot chili peppers876980876
..................
19989831d2c0053f7e585caa02ccbb3a96b708c078e9300red hot chili peppers10244089280876
19994181d2d697dbe47814d1de4064508b32ec00a1ec767red hot chili peppers3844090180876
19996421d2db9f945283c3f48ab685a84a78429a63a3f19red hot chili peppers664090580876
19998231d2e1fa030318413f194f741c674b6032094a0cered hot chili peppers3894090980876
19998891d2e583e63cbd9cfc526b53f90f4fd438271336ered hot chili peppers724091080876
\n", + "

5693 rows × 5 columns

\n", + "" + ], + "text/plain": [ + " user artist \\\n", + "4 00000c289a1829a808ac09c00daf10bc3c4e223b red hot chili peppers \n", + "1422 000429493d9716b66b02180d208d09b5b89fbe64 red hot chili peppers \n", + "2139 0007e26aafcfc0b6dcb87d7041583fbb7cced88a red hot chili peppers \n", + "3284 000b0bb32f149504e1df3cce85b6bfd20cef3dd0 red hot chili peppers \n", + "3322 000b2ee840cbda56e0f41c8f248c4fb7ee275db3 red hot chili peppers \n", + "... ... ... \n", + "1998983 1d2c0053f7e585caa02ccbb3a96b708c078e9300 red hot chili peppers \n", + "1999418 1d2d697dbe47814d1de4064508b32ec00a1ec767 red hot chili peppers \n", + "1999642 1d2db9f945283c3f48ab685a84a78429a63a3f19 red hot chili peppers \n", + "1999823 1d2e1fa030318413f194f741c674b6032094a0ce red hot chili peppers \n", + "1999889 1d2e583e63cbd9cfc526b53f90f4fd438271336e red hot chili peppers \n", + "\n", + " plays user_id artist_id \n", + "4 691 0 80876 \n", + "1422 234 29 80876 \n", + "2139 159 44 80876 \n", + "3284 46 68 80876 \n", + "3322 87 69 80876 \n", + "... ... ... ... \n", + "1998983 1024 40892 80876 \n", + "1999418 384 40901 80876 \n", + "1999642 66 40905 80876 \n", + "1999823 389 40909 80876 \n", + "1999889 72 40910 80876 \n", + "\n", + "[5693 rows x 5 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['artist'] == 'red hot chili peppers']" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "2e4294bb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "red hot chili peppers\n", + "muse\n", + "nirvana\n", + "coldplay\n", + "queen\n", + "the killers\n", + "foo fighters\n", + "placebo\n", + "pink floyd\n", + "the beatles\n" + ] + } + ], + "source": [ + "artist_id = 80876\n", + "n_similar = 10 # getting the top ten similar items\n", + "\n", + "# Use implicit to get similar items.\n", + "similar = model.similar_items(artist_id, n_similar)\n", + "# Print the names of our most similar artists\n", + "for artist in similar:\n", + " idx, score = artist\n", + " print (data.artist.loc[data.artist_id == idx].iloc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "0d37cf46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userartistplaysuser_idartist_id
000000c289a1829a808ac09c00daf10bc3c4e223bdie Ärzte1099030264
2943000a1585c5f65532a9c9187a882892982d345a5cdie Ärzte1486130264
3787000cb6427411006fe9a6193d3c4f59efed53fbefdie Ärzte77830264
62950014ffc91d3a5b59cce9bceaf22ef0d72e5711b8die Ärzte8812830264
13513003059a886782e4d7936da913d3f064f637d0b2bdie Ärzte527430264
\n", + "
" + ], + "text/plain": [ + " user artist plays user_id \\\n", + "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099 0 \n", + "2943 000a1585c5f65532a9c9187a882892982d345a5c die Ärzte 148 61 \n", + "3787 000cb6427411006fe9a6193d3c4f59efed53fbef die Ärzte 7 78 \n", + "6295 0014ffc91d3a5b59cce9bceaf22ef0d72e5711b8 die Ärzte 88 128 \n", + "13513 003059a886782e4d7936da913d3f064f637d0b2b die Ärzte 5 274 \n", + "\n", + " artist_id \n", + "0 30264 \n", + "2943 30264 \n", + "3787 30264 \n", + "6295 30264 \n", + "13513 30264 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['artist'] == 'die Ärzte'].head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "edb749cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "die Ärzte\n", + "guano apes\n", + "mando diao\n", + "soundtrack\n", + "apocalyptica\n", + "him\n", + "limp bizkit\n", + "[unknown]\n", + "bloodhound gang\n", + "nightwish\n" + ] + } + ], + "source": [ + "# Find the 10 most similar to die Ärzte\n", + "artist_id = 30264\n", + "n_similar = 10 # getting the top ten similar items\n", + "\n", + "# Use implicit to get similar items.\n", + "similar = model.similar_items(artist_id, n_similar)\n", + "# Print the names of our most similar artists\n", + "for artist in similar:\n", + " idx, score = artist\n", + " print (data.artist.loc[data.artist_id == idx].iloc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "076e43ad", + "metadata": {}, + "outputs": [], + "source": [ + "data['rank'] = data.groupby(['user_id'])['plays'].rank(ascending = False)\n", + "\n", + "# filtering for their first choice\n", + "data_1 = data[data['rank'] == 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "336f236b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userartistplaysuser_idartist_idrank
000000c289a1829a808ac09c00daf10bc3c4e223bdie Ärzte10990302641.0
25686005d521c9f8b1acfc13b7a4cc4b39085edfc786adie Ärzte933523302641.0
29112006aaaaf386fdbb0aea3f2bf9019e346a7294b6adie Ärzte946595302641.0
30440006f93b4213be13020a3819e0d0a86dcf97b58dedie Ärzte1375622302641.0
5393700c465e5b33365ab91cc5cf161590a38044954afdie Ärzte29241094302641.0
\n", + "
" + ], + "text/plain": [ + " user artist plays user_id \\\n", + "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099 0 \n", + "25686 005d521c9f8b1acfc13b7a4cc4b39085edfc786a die Ärzte 933 523 \n", + "29112 006aaaaf386fdbb0aea3f2bf9019e346a7294b6a die Ärzte 946 595 \n", + "30440 006f93b4213be13020a3819e0d0a86dcf97b58de die Ärzte 1375 622 \n", + "53937 00c465e5b33365ab91cc5cf161590a38044954af die Ärzte 2924 1094 \n", + "\n", + " artist_id rank \n", + "0 30264 1.0 \n", + "25686 30264 1.0 \n", + "29112 30264 1.0 \n", + "30440 30264 1.0 \n", + "53937 30264 1.0 " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_1[data_1['artist_id'] == 30264].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "1e957969", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userartistplaysuser_idartist_idrank
5393700c465e5b33365ab91cc5cf161590a38044954afdie Ärzte29241094302641.0
5393800c465e5b33365ab91cc5cf161590a38044954afequilibrium19361094364912.0
5393900c465e5b33365ab91cc5cf161590a38044954afensiferum17821094363433.0
5394000c465e5b33365ab91cc5cf161590a38044954afsystem of a down11671094925244.0
5394100c465e5b33365ab91cc5cf161590a38044954afsdp10421094854065.0
5394200c465e5b33365ab91cc5cf161590a38044954afdeichkind10131094290166.0
5394300c465e5b33365ab91cc5cf161590a38044954afknorkator9201094571577.0
5394400c465e5b33365ab91cc5cf161590a38044954aftyp:t.u.r.b.o.91110941030298.0
5394500c465e5b33365ab91cc5cf161590a38044954afserj tankian8131094859979.0
5394600c465e5b33365ab91cc5cf161590a38044954afrise against77910948204010.0
\n", + "
" + ], + "text/plain": [ + " user artist plays \\\n", + "53937 00c465e5b33365ab91cc5cf161590a38044954af die Ärzte 2924 \n", + "53938 00c465e5b33365ab91cc5cf161590a38044954af equilibrium 1936 \n", + "53939 00c465e5b33365ab91cc5cf161590a38044954af ensiferum 1782 \n", + "53940 00c465e5b33365ab91cc5cf161590a38044954af system of a down 1167 \n", + "53941 00c465e5b33365ab91cc5cf161590a38044954af sdp 1042 \n", + "53942 00c465e5b33365ab91cc5cf161590a38044954af deichkind 1013 \n", + "53943 00c465e5b33365ab91cc5cf161590a38044954af knorkator 920 \n", + "53944 00c465e5b33365ab91cc5cf161590a38044954af typ:t.u.r.b.o. 911 \n", + "53945 00c465e5b33365ab91cc5cf161590a38044954af serj tankian 813 \n", + "53946 00c465e5b33365ab91cc5cf161590a38044954af rise against 779 \n", + "\n", + " user_id artist_id rank \n", + "53937 1094 30264 1.0 \n", + "53938 1094 36491 2.0 \n", + "53939 1094 36343 3.0 \n", + "53940 1094 92524 4.0 \n", + "53941 1094 85406 5.0 \n", + "53942 1094 29016 6.0 \n", + "53943 1094 57157 7.0 \n", + "53944 1094 103029 8.0 \n", + "53945 1094 85997 9.0 \n", + "53946 1094 82040 10.0 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['user_id'] == 1094].head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "78235cbb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " artist score\n", + "0 tyler bates 1.337788\n", + "1 jan hegenberg 1.193714\n", + "2 five finger death punch 1.190098\n", + "3 slagsmålsklubben 1.178774\n", + "4 daniel licht 1.163203\n", + "5 kaizers orchestra 1.141305\n", + "6 eav 1.137476\n", + "7 fightstar 1.131872\n", + "8 flyleaf 1.128522\n", + "9 volbeat 1.127203\n", + "10 saliva 1.124229\n", + "11 freedom call 1.123379\n", + "12 jennifer rostock 1.118862\n", + "13 flobots 1.117408\n", + "14 skillet 1.115119\n", + "15 machinae supremacy 1.112002\n", + "16 eisbrecher 1.111799\n", + "17 aiden 1.107183\n", + "18 asp 1.102318\n", + "19 drowning pool 1.102212\n" + ] + } + ], + "source": [ + "user_id = 1094\n", + "\n", + "# Use the implicit recommender.\n", + "recommended = model.recommend(user_id, sparse_user_item,N = 20,filter_already_liked_items = False)\n", + "\n", + "artists = []\n", + "scores = []\n", + "\n", + "# Get artist names from ids\n", + "for item in recommended:\n", + " idx, score = item\n", + " artists.append(data.artist.loc[data.artist_id == idx].iloc[0])\n", + " scores.append(score)\n", + "\n", + "# Create a dataframe of artist names and scores\n", + "recommendations = pd.DataFrame({'artist': artists, 'score': scores})\n", + "\n", + "print (recommendations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bd6eee9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}