This commit is contained in:
arunjose696 2021-11-08 14:34:44 +03:00 committed by GitHub
commit 460783014c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -44,13 +44,13 @@ def filter_triplets(tp, min_uc=5, min_sc=0):
# Only keep the triplets for items which were clicked on by at least min_sc users.
if min_sc > 0:
itemcount = get_count(tp, 'movieId')
tp = tp[tp['movieId'].isin(itemcount.index[itemcount >= min_sc])]
tp = tp[tp['movieId'].isin(itemcount.index[itemcount["size"] >= min_sc])]
# Only keep the triplets for users who clicked on at least min_uc items
# After doing this, some of the items will have less than min_uc users, but should only be a small proportion
if min_uc > 0:
usercount = get_count(tp, 'userId')
tp = tp[tp['userId'].isin(usercount.index[usercount >= min_uc])]
tp = tp[tp['userId'].isin(usercount.index[usercount["size"] >= min_uc])]
# Update both usercount and itemcount after filtering
usercount, itemcount = get_count(tp, 'userId'), get_count(tp, 'movieId')