Source code for ucas_dm.prediction_algorithms.collaborate_based_algo
from .surprise_base_algo import SurpriseBaseAlgo
from surprise.prediction_algorithms import KNNBasic
import pandas as pd
import math
from surprise import Dataset, Reader
[docs]class CollaborateBasedAlgo(SurpriseBaseAlgo):
"""
Collaborative filtering algorithm.
"""
[docs] def __init__(self, sim_func='cosine', user_based=True, k=1):
"""
:param sim_func: similarity function: 'cosine','msd','pearson','pearson_baseline'
:param user_based: True--> user-user filtering strategy;False--> item-item filtering strategy
:param k: The (max) number of neighbors to take into account for aggregation
"""
super().__init__()
self._user_based = user_based
self._sim_func = sim_func
self._k = k
[docs] def train(self, train_set):
# News recommendation is a typical case that use users' implicit feedback to give recommendations, train set
# only contains binary or unary data (1 for seen, 0 for unseen). According to some papers, normalizing user
# vectors to unit vectors will increase the accuracy of recommending with binary data.
if self._surprise_model is None:
self._surprise_model = self._init_surprise_model()
train_set = pd.DataFrame(train_set)
train_set.columns = ['user_id', 'item_id']
self._user_log = train_set.copy()
train_set = train_set.drop_duplicates()
groups = train_set.groupby(['user_id'])
id_to_group_size = {}
for user_id, group in groups:
id_to_group_size[user_id] = group.shape[0]
train_set['rate'] = 1
train_set['rate'] = train_set.apply(lambda row: 1 / math.sqrt(id_to_group_size[row['user_id']]), axis=1)
reader = Reader(rating_scale=(0, 1))
train_s = Dataset.load_from_df(train_set, reader)
''' train surprise-framework based model '''
self._surprise_model.fit(train_s.build_full_trainset())
return self
[docs] def _init_surprise_model(self):
sim_options = {'name': self._sim_func, 'user_based': self._user_based}
return KNNBasic(k=self._k, sim_options=sim_options)
[docs] def to_dict(self):
"""
See :meth:`BaseAlgo.to_dict <base_algo.BaseAlgo.to_dict>` for more details.
"""
return {'type': 'Collaborative filtering', 'user_based': self._user_based, 'sim_fun': self._sim_func,
'k': self._k}