-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ef24180
commit 6cce29f
Showing
5 changed files
with
342 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# In[ ]: | ||
|
||
|
||
from util import m_normal, learning_rate, get_lambda | ||
from classes import ret | ||
import random as random | ||
import numpy as np | ||
import math | ||
def bpr_update(users, movies): | ||
count = 0 | ||
lr = learning_rate() | ||
lam = get_lambda() | ||
for u1 in users: | ||
u = users[u1] | ||
userid = u.userid | ||
Vu = u.factor | ||
if (len(u.movies_train) > 0): | ||
|
||
rand_pos = random.sample(u.movies_train.keys(), 1)[0] | ||
rand_neg = random.sample(movies.keys(), 1)[0] | ||
|
||
if rand_neg not in u.movies_train: | ||
Vi = movies[rand_pos].factor | ||
Vj = movies[rand_neg].factor | ||
firstterm = calculate_first_term(Vu, Vi, Vj) | ||
|
||
# USER FACTOR | ||
diff = Vi - Vj | ||
d = firstterm * diff | ||
derivative = d | ||
Vu = Vu + lr * (derivative + lam * np.linalg.norm(Vu)) | ||
users[u1].factor = Vu | ||
|
||
# ITEM POSITIVE FACTOR | ||
d = firstterm * Vu | ||
derivative = d | ||
Vi = Vi + lr * (derivative + lam * np.linalg.norm(Vi)) | ||
movies[rand_pos].factor = Vi | ||
|
||
#ITEM NEGATIVE FACTOR | ||
negvu = -1 * Vu | ||
d = firstterm * negvu | ||
derivative = d | ||
Vj = Vj + lr * (derivative + lam * np.linalg.norm(Vj)) | ||
movies[rand_neg].factor = Vj | ||
|
||
def calculate_first_term(Vu, Vi, Vj): | ||
boughtdot = np.dot(Vu, Vi) | ||
notboughtdot = np.dot(Vu, Vj) | ||
negxuij = (boughtdot - notboughtdot) * -1 | ||
if negxuij > 500: | ||
negxuij = 500 | ||
numerator = math.exp(negxuij) | ||
denominator = 1 + math.exp(negxuij) | ||
firstterm = numerator / denominator | ||
return firstterm | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# In[ ]: | ||
|
||
|
||
from util import random_vector | ||
class user: | ||
def __init__(self, userid): | ||
self.userid = userid | ||
self.movies_train = dict() | ||
self.movies_test = dict() | ||
self.movies_all = dict() | ||
self.factor = random_vector() | ||
|
||
class movie: | ||
def __init__(self, movieid, rating=0, title=None, genres=None): | ||
self.movieid = movieid | ||
self.rating = rating | ||
self.title = title | ||
self.genres = genres | ||
self.factor = random_vector() | ||
|
||
class ret: | ||
def __init__(self): | ||
self.userid = None | ||
self.movieid = None | ||
self.isuser = True | ||
self.retvalue = [] | ||
|
||
class usermovie: | ||
def __init__(self): | ||
self.userid = None | ||
self.movieid = None | ||
self.rating = 0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# In[ ]: | ||
|
||
|
||
from classes import user | ||
from classes import movie | ||
from numpy import random | ||
from util import min_rating, random_vector, num_users | ||
from random import seed | ||
import pandas as pd | ||
import numpy as np | ||
def read_ratings(filename): | ||
seed(42) | ||
np.random.seed(42) | ||
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp'] | ||
ratings = pd.read_csv(filename, sep=',', names=r_cols, encoding='latin-1') | ||
|
||
ratings['user_id'] = ratings['user_id'].astype(int) | ||
ratings['movie_id'] = ratings['movie_id'].astype(int) | ||
ratings['rating'] = ratings['rating'].astype(float) | ||
|
||
numusers = num_users() | ||
|
||
msks = ratings['user_id'] < numusers | ||
ratings = ratings[msks] | ||
users = dict() | ||
testcount = 0 | ||
traincount = 0 | ||
trainuserdict = dict() | ||
|
||
for index, row in ratings.iterrows(): | ||
userid = int(row['user_id']) | ||
movieid = int(row['movie_id']) | ||
rating1 = float(row['rating']) | ||
minmovierating = min_rating() | ||
if rating1 >= minmovierating: | ||
if random.random() < 0.7: | ||
traincount = traincount + 1 | ||
if userid in users.keys(): | ||
user1 = users[userid] | ||
user1.movies_train[movieid] = rating1 | ||
else: | ||
user1 = user(userid) | ||
user1.factor = random_vector() | ||
user1.movies_train[movieid] = rating1 | ||
users[userid] = user1 | ||
trainuserdict[userid] = 1 | ||
else: | ||
testcount = testcount + 1 | ||
if userid in users.keys(): | ||
user1 = users[userid] | ||
user1.movies_test[movieid] = rating1 | ||
else: | ||
user1 = user(userid) | ||
user1.factor = random_vector() | ||
user1.movies_test[movieid] = rating1 | ||
users[userid] = user1 | ||
|
||
for index, row in ratings.iterrows(): | ||
userid = int(row['user_id']) | ||
movieid = int(row['movie_id']) | ||
rating1 = float(row['rating']) | ||
if userid in users.keys(): | ||
user1 = users[userid] | ||
user1.movies_all[movieid] = rating1 | ||
|
||
return users | ||
|
||
def read_movies(filename): | ||
r_cols = ['movie_id', 'title', 'genres'] | ||
df = pd.read_csv(filename, sep=",", encoding='latin-1', names=r_cols) | ||
movies = dict() | ||
for index, row in df.iterrows(): | ||
movieid = row['movie_id'] | ||
movie1 = movie(movieid, 0) | ||
movie1.factor = random_vector() | ||
movies[movieid] = movie1 | ||
|
||
return movies | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# In[ ]: | ||
|
||
|
||
from classes import usermovie | ||
import numpy as np | ||
from sklearn.metrics import mean_squared_error | ||
from math import sqrt | ||
|
||
def hit_rate(users, movies): | ||
hits = 0 | ||
denom = 0 | ||
actual = [] | ||
predicted = [] | ||
actualall = [] | ||
predictedall = [] | ||
for u1 in users: | ||
u = users[u1] | ||
userid = u.userid | ||
usermovies = [] | ||
if userid in users: | ||
denom = denom + 1 | ||
ufactor = users[userid].factor | ||
for m1 in movies: | ||
m = movies[m1] | ||
mfactor = m.factor | ||
dotp = np.dot(ufactor, mfactor) | ||
if m.movieid in u.movies_all: | ||
actualall.append(u.movies_all[m.movieid]) | ||
predictedall.append(float(dotp)) | ||
|
||
if m.movieid in u.movies_test: | ||
actual.append(u.movies_test[m.movieid]) | ||
predicted.append(dotp) | ||
|
||
usermovied = usermovie() | ||
usermovied.userid = userid | ||
usermovied.movieid = m.movieid | ||
usermovied.rating = dotp | ||
usermovies.append(usermovied) | ||
|
||
usermovies.sort(key=lambda x: x.rating, reverse=True) | ||
count = 0 | ||
for um in usermovies: | ||
userid = um.userid | ||
movieid = um.movieid | ||
#rating = um.rating | ||
if movieid in users[userid].movies_test: | ||
hits = hits + 1 | ||
break | ||
count = count + 1 | ||
if count > 9: | ||
break | ||
|
||
sortedpredicted = predicted | ||
least = min(sortedpredicted) | ||
sortedpredicted = [x + least for x in sortedpredicted] | ||
sortedpredicted = [x / max(sortedpredicted) for x in sortedpredicted] | ||
sortedpredicted = [x * 5 for x in sortedpredicted] | ||
predicted = sortedpredicted | ||
|
||
sortedpredicted = predictedall | ||
least = min(sortedpredicted) | ||
sortedpredicted = [x + least for x in sortedpredicted] | ||
sortedpredicted = [x / max(sortedpredicted) for x in sortedpredicted] | ||
sortedpredicted = [x * 5 for x in sortedpredicted] | ||
predictedall = sortedpredicted | ||
|
||
rms = sqrt(mean_squared_error(actual, predicted)) | ||
rmsall = sqrt(mean_squared_error(actualall, predictedall)) | ||
|
||
return hits, denom, rms, rmsall | ||
|
||
def hit_rate_SVD(users, movies, svd): | ||
hits = 0 | ||
denom = 0 | ||
actual = [] | ||
predicted = [] | ||
actualall = [] | ||
predictedall = [] | ||
for u1 in users: | ||
u = users[u1] | ||
userid = u.userid | ||
usermovies = [] | ||
if userid in users: | ||
denom = denom + 1 | ||
for m1 in movies: | ||
m = movies[m1] | ||
dotp = float(svd.predict(int(userid), int(m.movieid))[3]) | ||
|
||
if m.movieid in u.movies_all: | ||
actualall.append(u.movies_all[m.movieid]) | ||
predictedall.append(float(dotp)) | ||
|
||
if (str(m.movieid) in u.movies_test) | (int(m.movieid) in u.movies_test): | ||
actual.append(u.movies_test[m.movieid]) | ||
predicted.append(float(dotp)) | ||
|
||
usermovied = usermovie() | ||
usermovied.userid = userid | ||
usermovied.movieid = m.movieid | ||
usermovied.rating = dotp | ||
usermovies.append(usermovied) | ||
|
||
usermovies.sort(key=lambda x: x.rating, reverse=True) | ||
count = 0 | ||
for um in usermovies: | ||
userid = um.userid | ||
movieid = um.movieid | ||
|
||
if (str(movieid) in users[userid].movies_test) | (int(movieid) in users[userid].movies_test): | ||
hits = hits + 1 | ||
break | ||
count = count + 1 | ||
if count > 9: | ||
break | ||
|
||
rms = sqrt(mean_squared_error(actual, predicted)) | ||
rmsall = sqrt(mean_squared_error(actualall, predictedall)) | ||
|
||
return hits, denom, rms, rmsall | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# In[ ]: | ||
|
||
|
||
from numpy import random | ||
import numpy as np | ||
|
||
def num_users(): | ||
return 1000000 | ||
|
||
def dimension(): | ||
return 50 | ||
|
||
def min_rating(): | ||
return 4 | ||
|
||
def learning_rate(): | ||
return 1 | ||
|
||
def get_lambda(): | ||
return 0.1 | ||
|
||
def random_vector(): | ||
dim = dimension() | ||
cov_mtx = cov_matrix() | ||
return random.multivariate_normal(np.zeros(dim), cov_mtx) | ||
|
||
def cov_matrix(): | ||
dim = dimension() | ||
cov = np.zeros((dim, dim), dtype=float) | ||
for i in range(dim): | ||
cov[i][i] = 0.1 | ||
return cov | ||
|
||
def m_normal(mean): | ||
cov_mtx = cov_matrix() | ||
return random.multivariate_normal(mean=mean, cov=cov_mtx) | ||
|