diff --git a/moiveRe/DeepFMcopy.py b/moiveRe/DeepFMcopy.py
new file mode 100644
index 0000000..acf9e8d
--- /dev/null
+++ b/moiveRe/DeepFMcopy.py
@@ -0,0 +1,77 @@
+import django
+import os
+import torch
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings")
+django.setup()
+
+
+
+
+
+
+
+
+
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+import torch
+from deepctr_torch.models import DeepFM
+from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names
+
+# 生成示例数据
+np.random.seed(2024)
+data_size = 10000
+
+# 假设有 26 个离散特征 C1, C2, ..., C26
+sparse_features = ['C' + str(i) for i in range(1,4)]
+# 假设有 13 个连续特征 I1, I2, ..., I13
+dense_features = ['I' + str(i) for i in range(1,3)]
+
+data = {feat: np.random.choice([0, 1, 2, 3, 4], data_size) for feat in sparse_features}
+data.update({feat: np.random.rand(data_size) for feat in dense_features})
+data['label'] = np.random.randint(0, 2, data_size)
+
+# 转换为 DataFrame
+df = pd.DataFrame(data)
+print(df)
+
+
+#预处理部分:
+# 对离散特征进行标签编码,转为数值的意思
+for feat in sparse_features:
+ lbe = LabelEncoder()
+ df[feat] = lbe.fit_transform(df[feat])
+# 对连续特征进行归一化
+mms = MinMaxScaler()
+df[dense_features] = mms.fit_transform(df[dense_features])
+
+train, test = train_test_split(df, test_size=0.2)
+
+# deepctr模型输入
+fixlen_feature_columns = [SparseFeat(feat, df[feat].nunique(), embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1,) for feat in dense_features]
+
+
+
+
+# DeepFM 模型,开导!
+model = DeepFM(fixlen_feature_columns, fixlen_feature_columns, task='binary') # 重复传入相同的列~ 这里因为是根据CTR来对召回进行排序所以就是binary任务
+
+# 编译模型
+model.compile("adam", "binary_crossentropy",
+ metrics=["binary_crossentropy"], ) #优化器选择balabala,这里直接看库里给的文档
+
+# 训练模型
+train_model_input = {name: train[name].values for name in get_feature_names(fixlen_feature_columns)}
+test_model_input = {name: test[name].values for name in get_feature_names(fixlen_feature_columns)}
+label_distribution = train['label'].value_counts()
+print('label_dis:',label_distribution)
+history = model.fit(train_model_input, train['label'].values,
+ batch_size=50, epochs=4, verbose=2, validation_split=0, ) #fit开始train
+
+
+
+
+
diff --git a/moiveRe/DeepFm.py b/moiveRe/DeepFm.py
index f89a2ae..e69de29 100644
--- a/moiveRe/DeepFm.py
+++ b/moiveRe/DeepFm.py
@@ -1,50 +0,0 @@
-import numpy as np
-import pandas as pd
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from sklearn.model_selection import train_test_split
-from deepctr.models import DeepFM
-from deepctr.feature_column import SparseFeat, get_feature_names
-
-# 假设有一个包含广告特征和点击情况的数据集
-data = {
- 'user_id': [1, 2, 3, 4, 5],
- 'ad_id': [101, 102, 103, 104, 105],
- 'age': [25, 30, 35, 40, 45],
- 'gender': ['M', 'F', 'M', 'F', 'M'],
- 'click': [1, 0, 1, 0, 1]
-}
-df = pd.DataFrame(data)
-
-# 定义特征列
-sparse_features = ['user_id', 'ad_id', 'gender']
-dense_features = ['age']
-
-# 将特征列转换为SparseFeat和DenseFeat类型
-feature_columns = [SparseFeat(feat, vocabulary_size=df[feat].nunique(), embedding_dim=4)
- for feat in sparse_features] + [DenseFeat(feat, 1) for feat in dense_features]
-
-# 划分训练集和测试集
-train, test = train_test_split(df, test_size=0.2)
-
-# 创建模型
-model = DeepFM(feature_columns, feature_columns, task='binary')
-
-# 编译模型
-model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'])
-
-# 将数据转换为模型所需的输入格式
-train_model_input = {name: train[name] for name in sparse_features + dense_features}
-test_model_input = {name: test[name] for name in sparse_features + dense_features}
-train_target = train['click'].values
-test_target = test['click'].values
-
-# 训练模型
-history = model.fit(train_model_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.2)
-
-# 预测
-preds = model.predict(test_model_input, batch_size=256)
-
-# 输出预测结果
-print("预测点击率:", preds)
diff --git a/moiveRe/UserExperienceUpdate.py b/moiveRe/UserExperienceUpdate.py
new file mode 100644
index 0000000..701fbd7
--- /dev/null
+++ b/moiveRe/UserExperienceUpdate.py
@@ -0,0 +1,26 @@
+
+
+import django
+from django.conf import settings
+import os
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings") # 替换 "yourproject.settings" 为你的项目的 settings 模块路径
+django.setup()
+
+from moiveReApp.models import UserExperience
+
+
+# 兴趣标签集合
+experience_tags = [
+ 'NewtoGame','Middle','Experienced'
+]
+
+# 将标签添加到数据库中
+for tag_name in experience_tags:
+ tag, created = UserExperience.objects.get_or_create(name=tag_name)
+
+ # 如果标签是新创建的,则打印一条消息
+ if created:
+ print(f'Created tag: {tag_name}')
+ else:
+ print(f'Tag already exists: {tag_name}')
\ No newline at end of file
diff --git a/moiveRe/moiveReApp/models.py b/moiveRe/moiveReApp/models.py
index 79c5322..a4dfab5 100644
--- a/moiveRe/moiveReApp/models.py
+++ b/moiveRe/moiveReApp/models.py
@@ -68,10 +68,19 @@ class Userinterests(models.Model):
def __str__(self):
return self.name
+class UserExperience(models.Model):
+ name = models.CharField(max_length=100, unique=True)
+
+ def __str__(self):
+ return self.name
+
+
+
class UserProfile(models.Model):
# 与用户关联的一对一字段,这里假设你使用了Django内置的User模型
user = models.OneToOneField(User, on_delete=models.CASCADE)
+ userExperience = models.ManyToManyField(UserExperience, related_name='user_experience')
interests = models.ManyToManyField(Userinterests, related_name='interested_users')
liked_items = models.ManyToManyField(Question, blank=True, related_name='liked_users')
click_items = models.ManyToManyField(Question, blank=True, related_name='clicked_users')
diff --git a/moiveRe/moiveReApp/templates/moiveReApp/deepfmrec.html b/moiveRe/moiveReApp/templates/moiveReApp/deepfmrec.html
new file mode 100644
index 0000000..048f2d5
--- /dev/null
+++ b/moiveRe/moiveReApp/templates/moiveReApp/deepfmrec.html
@@ -0,0 +1,229 @@
+
+
+
+
+
+
+ Skyrim Mod - 随机Mod推荐
+
+
+
+
+
+
+
+ 综合推荐
+
+
+
+
+
+
+
diff --git a/moiveRe/moiveReApp/templates/moiveReApp/index.html b/moiveRe/moiveReApp/templates/moiveReApp/index.html
index 4c61a13..b38ad6e 100644
--- a/moiveRe/moiveReApp/templates/moiveReApp/index.html
+++ b/moiveRe/moiveReApp/templates/moiveReApp/index.html
@@ -187,8 +187,8 @@ Skyrim Mod
Welcome, {{ user.username }}!
{% endif %}
-
- UserCF Sort
+
+ 个性❤推荐
diff --git a/moiveRe/moiveReApp/urls.py b/moiveRe/moiveReApp/urls.py
index 5c945f5..9f5f56d 100644
--- a/moiveRe/moiveReApp/urls.py
+++ b/moiveRe/moiveReApp/urls.py
@@ -7,7 +7,7 @@
from django.urls import include, path
from django.views.static import serve
from .views import question_detail
-from .views import register, user_login,random_questions,like_question,usercf,itemcf,bertcall
+from .views import register, user_login,random_questions,like_question,usercf,itemcf,bertcall,deepfmrec
app_name = "moiveReApp"
urlpatterns = [
@@ -18,6 +18,7 @@
path('usercf/', usercf, name='usercf'),
path('itemcf/', itemcf, name='itemcf'),
path('bertcall/', bertcall, name='bertcall'),
+ path('deepfmrec/', deepfmrec, name='deepfmrec'),
path('like//', like_question, name='like_question'),
path('/ratings/', question_detail, name='question_detail'),
diff --git a/moiveRe/moiveReApp/views.py b/moiveRe/moiveReApp/views.py
index 8449ffd..0c5bfe9 100644
--- a/moiveRe/moiveReApp/views.py
+++ b/moiveRe/moiveReApp/views.py
@@ -14,6 +14,13 @@
from sklearn.metrics.pairwise import cosine_similarity
import math
from collections import Counter
+import torch
+from transformers import DistilBertTokenizer, DistilBertModel
+import pandas as pd
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+import torch
+from deepctr_torch.models import DeepFM
+from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names
class IndexView(generic.ListView):
template_name = "moiveReApp/index.html"
@@ -416,10 +423,10 @@ def itemcf(request):
#print(user_similarity_matrix)
# 生成推荐列表
recommendations = itemcf_.generate_recommendations(current_user_id, item_similarity_matrix, user_item_matrix, user_dict, item_dict, k=30)
- # 获取推荐的电影信息
+ # 获取推荐
recommended_movies = Question.objects.filter(id__in=recommendations[:8]) #只展示前ki个
- # 在模板中渲染推荐结果
+
return render(request, 'moiveReApp/itemcf.html', {'recommended_movies': recommended_movies})
@@ -449,4 +456,351 @@ def bertcall(request):
recommended_movies = Question.objects.filter(id__in=recommendations[:8]) #只展示前ki个
# 在模板中渲染推荐结果
- return render(request, 'moiveReApp/bertcall.html', {'recommended_movies': recommended_movies})
\ No newline at end of file
+ return render(request, 'moiveReApp/bertcall.html', {'recommended_movies': recommended_movies})
+
+
+
+
+
+
+
+#多路召回 + DeepFM 推荐
+
+
+
+class itemcf_deepfm():
+
+ def build_user_item_matrix(question_list):
+ """
+ 根据物品-用户倒排表构建用户-物品交互矩阵
+
+ 参数:
+ - question_list: QuerySet,包含所有问题的列表
+
+ 返回:
+ - user_item_matrix: 2D numpy数组,表示用户-物品交互矩阵
+ - item_dict: 字典,将物品ID映射到索引
+ - user_dict: 字典,将用户ID映射到索引
+ """
+
+ # 获取所有用户的ID和物品的ID
+ all_users = set()
+ all_items = set()
+
+ for question in question_list:
+ item_id = question.id
+ liked_users = question.liked_by.all()
+
+ all_users.update(user.id for user in liked_users)
+ all_items.add(item_id)
+
+ # 创建物品-用户的倒排表
+ item_user_dict = {}
+ for question in question_list:
+ item_id = question.id
+ liked_users = set(user.id for user in question.liked_by.all())
+ item_user_dict[item_id] = liked_users
+
+ # 创建物品相似性矩阵
+ num_items = len(all_items)
+ num_users = len(all_users)
+
+ # 建立物品ID到索引的映射
+ item_dict = {item_id: index for index, item_id in enumerate(all_items)}
+
+ # 建立用户ID到索引的映射
+ user_dict = {user_id: index for index, user_id in enumerate(all_users)}
+
+ # 初始化用户-物品交互矩阵
+ user_item_matrix = np.zeros((num_users, num_items))
+
+ for item_id, liked_users in item_user_dict.items():
+ item_index = item_dict[item_id]
+
+ for user_id in liked_users:
+ user_index = user_dict[user_id]
+ user_item_matrix[user_index, item_index] = 1 # 表示用户喜欢该物品
+
+ return user_item_matrix, item_dict, user_dict #转置一下就是倒排。。
+
+ def calculate_user_similarity(user_item_matrix):
+ # 计算物品相似度矩阵
+ item_similarity_matrix = cosine_similarity(user_item_matrix.T) #转置才是物品-用户表
+ np.fill_diagonal(item_similarity_matrix, 0)
+ return item_similarity_matrix
+
+ def generate_recommendations(target_user, item_similarity_matrix, user_item_matrix, user_dict, item_dict, k):
+
+
+ # 获取目标用户的索引
+ target_user_index = user_dict.get(target_user)
+ if target_user_index is None:
+ raise ValueError(f"目标用户 '{target_user}' 不存在。")
+
+ # 获取目标用户(未)交互的物品
+ target_user_interactions = user_item_matrix[target_user_index]
+
+ # 初始化推荐列表
+ recommendations = []
+
+
+ # 遍历每个物品,计算目标用户对未交互物品的感兴趣程度
+ for item_id, item_index in item_dict.items():
+ if not target_user_interactions[item_index]:
+ # 目标用户未交互过该物品
+ interest_score = 0
+ # 获取与目标物品最相似的前K个物品的索引
+ similar_item_indices = np.argsort(item_similarity_matrix[item_index])[::-1][:k]
+
+ # 计算目标用户对该物品的感兴趣程度
+ for similar_item_index in similar_item_indices:
+ if user_item_matrix[target_user_index, similar_item_index] == 1:
+ # 用户u交互过该物品,此即为N(u)∩S(j,k)
+ similarity_score = item_similarity_matrix[item_index, similar_item_index]
+ interest_score += similarity_score
+
+ recommendations.append((item_id, interest_score))
+
+ # 根据感兴趣程度排序推荐列表
+ recommendations.sort(key=lambda x: x[1], reverse=True)
+
+
+ # 返回排序后的物品ID列表
+ return recommendations
+
+class hotitem():
+
+
+ def recall_hot_item(question_list):
+ sorted_question_list = sorted(question_list, key=lambda q: q.likes, reverse=True)
+ recommendations = [(question.id, question.likes) for question in sorted_question_list]
+ return recommendations
+
+class newitem():
+ @staticmethod
+ def recall_new_item(question_list):
+ sorted_question_list = sorted(question_list, key=lambda q: q.pub_date, reverse=True)
+ recommendations = [(question.id, 1) for question in sorted_question_list] #得分为1 不带偏见的推荐新物品~
+ return recommendations
+
+class U2TAG2I():
+
+
+
+ def compute_interest_score(user_profile, questions):
+ interest_scores = {}
+ user_interests = set(user_profile.interests.all())
+ user_interests_names = [interest.name for interest in user_interests]
+ #print(user_interests_names)
+ for question in questions:
+ if question.category in user_interests_names:
+ interest_scores[question.id] = 0.3
+ else:
+ interest_scores[question.id] = 0
+
+ return interest_scores
+
+ def compute_similarity(user_profile, questions):
+ tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
+ model = DistilBertModel.from_pretrained('distilbert-base-uncased')
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ model.to(device)
+
+ user_interests_str = ' '.join([interest.name for interest in user_profile.interests.all()])
+
+ question_details = [question.detail for question in questions]
+ num_questions = len(question_details)
+ question_ids = [question.id for question in questions]
+ question_score = []
+ #print(question_ids)
+ question_details.append(user_interests_str)
+ texts = question_details
+
+
+
+ inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
+
+ # 将张量移动到GPU上
+ input_ids = inputs['input_ids'].to(device)
+ attention_mask = inputs['attention_mask'].to(device)
+ # 使用DistilBERT模型进行推理
+ with torch.no_grad():
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
+
+ # 将文本表示向量转移到CPU上
+ text_embeddings = outputs.mean(dim=1).cpu().numpy()
+
+ # 计算相似度
+ for i in range(num_questions):
+ similarity = cosine_similarity([text_embeddings[i]], [text_embeddings[-1]])[0][0]
+ question_score.append(similarity*0.7)
+
+ # 使用 zip() 函数合并两个列表成元组列表
+ combined = list(zip(question_ids, question_score))
+ # 将元组列表转换为字典
+ interest_score = dict(combined)
+
+ return interest_score
+
+ def generate_recommendations(score1,score2):
+ result_dict = {}
+ for key in score1:
+ if key in score2:
+ result_dict[key] = score1[key] + score2[key]
+
+ sorted_items = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
+ top_k = sorted_items
+
+ return top_k
+
+
+
+def deepfmrec(request):
+ current_user_id = request.user.id # 假设你使用了Django的认证系统
+ question_list = Question.objects.all()
+ user_profile = UserProfile.get_user_profile(current_user_id)
+
+ #召回路
+ #1.协同过滤itemcf
+ user_item_matrix, item_dict, user_dict = itemcf_deepfm.build_user_item_matrix(question_list)
+ item_similarity_matrix = itemcf_deepfm.calculate_user_similarity(user_item_matrix)
+ r1 = itemcf_deepfm.generate_recommendations(current_user_id, item_similarity_matrix, user_item_matrix, user_dict, item_dict, k=30)
+ liked_items = list(user_profile.liked_items.values_list('id', flat=True))
+
+ #2.热门物品
+ r2 = hotitem.recall_hot_item(question_list)
+ r2 = [item for item in r2 if item[0] not in liked_items]
+
+ #3.新出物品
+ r3 = newitem.recall_new_item(question_list)
+ r3 = [item for item in r3 if item[0] not in liked_items]
+
+ #4.U2TAG2L
+ a = U2TAG2I.compute_interest_score(user_profile, questions=question_list)
+ b = U2TAG2I.compute_similarity(user_profile, questions=question_list)
+
+ r4 = U2TAG2I.generate_recommendations(a, b)
+ r4 = [item for item in r4 if item[0] not in liked_items]
+
+ #汇总4路召回
+
+ # 获取每个类别前 10 个物品的 ID,去除重复项
+ top_10_ids = list(
+ set([item[0] for item in r1[:10]] + [item[0] for item in r2[:10]] + [item[0] for item in r3[:10]] + [item[0] for item in
+ r4[:10]]))
+ # print(top_10_ids)
+ # 根据 top_10_ids 从 r1, r2, r3, r4 中找到对应的得分
+ result = []
+ for item_id in top_10_ids:
+ score1 = next((item[1] for item in r1 if item[0] == item_id), None)
+ score2 = next((item[1] for item in r2 if item[0] == item_id), None)
+ score3 = next((item[1] for item in r3 if item[0] == item_id), None)
+ score4 = next((item[1] for item in r4 if item[0] == item_id), None)
+ result.append((item_id, score1, score2, score3, score4))
+ df_origin = pd.DataFrame(result, columns=['id', 'feature1', 'feature2', 'feature3', 'feature4'])
+ df = pd.DataFrame(result, columns=['id', 'feature1', 'feature2', 'feature3', 'feature4'])
+
+ #进入模型预测:
+
+ rec_id = df['id']
+ df.drop(columns=['id'], inplace=True) #id不是特征
+ # 进行预测任务,不需要标签列
+ # 对特征进行预处理,包括离散特征进行标签编码,连续特征进行归一化
+
+ # 对离散特征进行标签编码
+ sparse_features = ['feature2', 'feature3'] # 假设这些是离散特征的列名
+ for feat in sparse_features:
+ lbe = LabelEncoder()
+ df[feat] = lbe.fit_transform(df[feat])
+
+ # 对连续特征进行归一化
+ dense_features = ['feature1', 'feature4'] # 假设这些是连续特征的列名
+ mms = MinMaxScaler()
+ df[dense_features] = mms.fit_transform(df[dense_features])
+ # deepctr模型输入
+ fixlen_feature_columns = [SparseFeat(feat, df[feat].nunique(), embedding_dim=4) for feat in sparse_features] + [
+ DenseFeat(feat, 1, ) for feat in dense_features]
+
+ # DeepFM 模型
+ model = DeepFM(fixlen_feature_columns, fixlen_feature_columns, task='binary')
+ # 编译模型
+ model.compile("adam", "binary_crossentropy", metrics=["binary_crossentropy"])
+ # 获取模型输入
+ model_input = {name: df[name].values for name in get_feature_names(fixlen_feature_columns)}
+ # 进行预测
+ pred_ans = model.predict(model_input, batch_size=1)
+ pred_list = [(id, score) for id, score in zip(rec_id, pred_ans.flatten())]
+ # 假设 pred_list 是一个包含预测结果的列表,每个元素是一个元组 (id, score)
+ pred_list_sorted = sorted(pred_list, key=lambda x: x[1], reverse=True)
+
+ # 输出排序后的预测结果列表
+ #print(pred_list_sorted)
+
+ # 获取推荐
+ recommendations =[item_id for item_id, _ in pred_list_sorted]
+ recommended_all = Question.objects.filter(id__in=recommendations)
+ ki=8
+ recommended_movies = Question.objects.filter(id__in=recommendations[:ki]) # 只展示前ki个
+ print('前者:',recommended_movies)
+
+
+ #根据用户反馈实时训练更新===================================================================
+ # 获取当前用户喜欢或点击的物品的ID列表
+ liked_or_clicked_items_ids = list(user_profile.liked_items.values_list('id', flat=True))
+ liked_or_clicked_items_ids.extend(list(user_profile.click_items.values_list('id', flat=True)))
+ liked_or_clicked_items_ids = set(liked_or_clicked_items_ids)
+
+ #与推荐的物品取交集
+
+ # 将被当前用户点击或喜欢的物品的ID放入列表中
+ like_or_click_items = [movie.id for movie in recommended_all if movie.id in liked_or_clicked_items_ids]
+ recommend_but_not_click_or_like = [movie.id for movie in recommended_all[ki:] if movie.id not in liked_or_clicked_items_ids]
+ # 获取 like_or_click_items 对应的行号
+ row_indices_true = df_origin[df_origin['id'].isin(like_or_click_items)].index
+ row_indices_false =df_origin[df_origin['id'].isin(recommend_but_not_click_or_like)].index
+
+ train_data_true= df.loc[row_indices_true, ['feature1', 'feature2', 'feature3', 'feature4']]
+ train_data_false = df.loc[row_indices_false, ['feature1', 'feature2', 'feature3', 'feature4']]
+ train_data_false['label'] = 0
+ train_data_true['label'] = 1
+
+ train_data = pd.concat([train_data_true, train_data_false], ignore_index=True)
+
+ #print(train_data)
+ train_threshold=1
+ # 检查 train_data 中的标签分布情况
+ label_distribution = train_data['label'].value_counts()
+ #print('label_dis:',label_distribution)
+
+ if len(train_data) >= train_threshold:
+ #训练输入
+ fixlen_feature_columns = [SparseFeat(feat, train_data[feat].nunique(), embedding_dim=4) for feat in sparse_features] + [
+ DenseFeat(feat, 1, ) for feat in dense_features]
+
+ train_model_input = {name: train_data[name].values for name in get_feature_names(fixlen_feature_columns)}
+ #print('input:',train_model_input)
+
+ history = model.fit(train_model_input, train_data['label'].values,
+ batch_size=len(train_data), epochs=4, verbose=2, validation_split=0.0, )
+
+ #训练后预测
+ model_input = {name: df[name].values for name in get_feature_names(fixlen_feature_columns)}
+ pred_ans = model.predict(model_input, batch_size=1)
+ pred_list = [(id, score) for id, score in zip(rec_id, pred_ans.flatten())]
+ pred_list_sorted = sorted(pred_list, key=lambda x: x[1], reverse=True)
+ recommendations = [item_id for item_id, _ in pred_list_sorted]
+ recommended_movies1 = []
+
+ for question_id in recommendations:
+ q =Question.objects.filter(id=question_id)
+ recommended_movies1.append(q)
+ from itertools import chain
+ recommended_movies = list(chain.from_iterable(recommended_movies1))
+
+
+ return render(request, 'moiveReApp/deepfmrec.html', {'recommended_movies': recommended_movies})
+ return render(request, 'moiveReApp/deepfmrec.html', {'recommended_movies': recommended_movies})
+
+
+
+
diff --git a/moiveRe/recommendationrecall.py b/moiveRe/recommendationrecall.py
index 22028df..4e1ccde 100644
--- a/moiveRe/recommendationrecall.py
+++ b/moiveRe/recommendationrecall.py
@@ -20,7 +20,7 @@
from django.contrib.auth.models import User
-user = User.objects.get(username='gly233')
+user = User.objects.get(username='gege')
user_profile = UserProfile.get_user_profile(user)
question_list = Question.objects.all()
@@ -129,28 +129,34 @@ def generate_recommendations(target_user, item_similarity_matrix, user_item_matr
user_item_matrix, item_dict, user_dict = itemcf.build_user_item_matrix(question_list)
item_similarity_matrix = itemcf.calculate_user_similarity(user_item_matrix)
-r1 = itemcf.generate_recommendations(user.id, item_similarity_matrix, user_item_matrix, user_dict, item_dict, k=30)[:10]
+r1 = itemcf.generate_recommendations(user.id, item_similarity_matrix, user_item_matrix, user_dict, item_dict, k=30)
+
+# 获取用户已经喜欢的物品的列表
+liked_items = list(user_profile.liked_items.values_list('id', flat=True))
+print(liked_items)
+#print(len(liked_items),liked_items)
class hotitem():
- def recall_hot_item(question_list,k):
+ def recall_hot_item(question_list):
sorted_question_list = sorted(question_list, key=lambda q: q.likes, reverse=True)
- recommendations = [question.id for question in sorted_question_list]
- return recommendations[:k]
+ recommendations = [(question.id, question.likes) for question in sorted_question_list]
+ return recommendations
-r2 = hotitem.recall_hot_item(question_list,10)
+r2 = hotitem.recall_hot_item(question_list)
+r2 = [item for item in r2 if item[0] not in liked_items]
class newitem():
-
- def recall_new_item(question_list,k):
+ @staticmethod
+ def recall_new_item(question_list):
sorted_question_list = sorted(question_list, key=lambda q: q.pub_date, reverse=True)
- recommendations = [question.id for question in sorted_question_list]
- return recommendations[:k]
-
-r3 = newitem.recall_new_item(question_list,10)
+ recommendations = [(question.id, 1) for question in sorted_question_list] #得分为1 不带偏见的推荐新物品~
+ return recommendations
+r3 = newitem.recall_new_item(question_list)
+r3 = [item for item in r3 if item[0] not in liked_items]
class U2TAG2I():
@@ -211,14 +217,15 @@ def compute_similarity(user_profile, questions):
return interest_score
- def generate_recommendations(score1,score2,k):
+ def generate_recommendations(score1,score2):
result_dict = {}
for key in score1:
if key in score2:
result_dict[key] = score1[key] + score2[key]
sorted_items = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
- top_k = sorted_items[:k]
+ top_k = sorted_items
+
return top_k
@@ -226,15 +233,36 @@ def generate_recommendations(score1,score2,k):
a=U2TAG2I.compute_interest_score(user_profile,questions=question_list)
b=U2TAG2I.compute_similarity(user_profile,questions=question_list)
-r4 = U2TAG2I.generate_recommendations(a,b,10)
-
+r4 = U2TAG2I.generate_recommendations(a,b)
+r4 = [item for item in r4 if item[0] not in liked_items]
+'''
print(r1)
print(r2)
print(r3)
print(r4)
-
+'''
+
+# 获取每个类别前 10 个物品的 ID,去除重复项
+top_10_ids = list(set([item[0] for item in r1[:10]] + [item[0] for item in r2[:10]] + [item[0] for item in r3[:10]] + [item[0] for item in r4[:10]]))
+#print(top_10_ids)
+# 根据 top_10_ids 从 r1, r2, r3, r4 中找到对应的得分
+result = []
+for item_id in top_10_ids:
+ score1 = next((item[1] for item in r1 if item[0] == item_id), None)
+ score2 = next((item[1] for item in r2 if item[0] == item_id), None)
+ score3 = next((item[1] for item in r3 if item[0] == item_id), None)
+ score4 = next((item[1] for item in r4 if item[0] == item_id), None)
+ result.append((item_id, score1, score2, score3, score4))
+
+
+import pandas as pd
+df = pd.DataFrame(result, columns=['id', 'feature1', 'feature2', 'feature3', 'feature4'])
+
+# 输出 DataFrame
+print(df)
+df.to_csv('test_data.csv', index=False)
diff --git a/moiveRe/sortbyDeepFM.py b/moiveRe/sortbyDeepFM.py
new file mode 100644
index 0000000..d589cef
--- /dev/null
+++ b/moiveRe/sortbyDeepFM.py
@@ -0,0 +1,72 @@
+import django
+import os
+import torch
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings")
+django.setup()
+
+
+
+
+
+
+
+
+
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+import torch
+from deepctr_torch.models import DeepFM
+from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names
+
+# 假设你已经准备好了包含特征的 DataFrame,其中 'id' 列是不参与预测的特征列,其余列是特征
+# 你可以通过删除 'id' 列来获得特征数据
+df = pd.read_csv('test_data.csv') # 假设你的数据集保存为 CSV 文件
+rec_id = df['id']
+# 假设 'id' 列是索引,不是特征,因此将其从 DataFrame 中移除
+df.drop(columns=['id'], inplace=True)
+# 进行预测任务,不需要标签列
+# 你需要对特征进行预处理,包括离散特征进行标签编码,连续特征进行归一化
+
+# 对离散特征进行标签编码
+sparse_features = ['feature2','feature3'] # 假设这些是离散特征的列名
+for feat in sparse_features:
+ lbe = LabelEncoder()
+ df[feat] = lbe.fit_transform(df[feat])
+
+# 对连续特征进行归一化
+dense_features = ['feature1','feature4'] # 假设这些是连续特征的列名
+mms = MinMaxScaler()
+df[dense_features] = mms.fit_transform(df[dense_features])
+print(df)
+# deepctr模型输入
+fixlen_feature_columns = [SparseFeat(feat, df[feat].nunique(), embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1,) for feat in dense_features]
+
+# DeepFM 模型
+model = DeepFM(fixlen_feature_columns, fixlen_feature_columns, task='binary')
+
+# 编译模型
+model.compile("adam", "binary_crossentropy", metrics=["binary_crossentropy"])
+
+# 获取模型输入
+model_input = {name: df[name].values for name in get_feature_names(fixlen_feature_columns)}
+
+# 进行预测
+pred_ans = model.predict(model_input, batch_size=1)
+pred_list = [(id, score) for id, score in zip(rec_id, pred_ans.flatten())]
+
+# 假设 pred_list 是一个包含预测结果的列表,每个元素是一个元组 (id, score)
+pred_list_sorted = sorted(pred_list, key=lambda x: x[1], reverse=True)
+
+# 输出排序后的预测结果列表
+print(pred_list_sorted)
+
+
+
+
+
+
+
+
+