Skip to content

Commit

Permalink
多路召回+DeepFM实时更新
Browse files Browse the repository at this point in the history
  • Loading branch information
BlackTea-c committed Mar 17, 2024
1 parent a90aaa7 commit 33b7f65
Show file tree
Hide file tree
Showing 10 changed files with 819 additions and 73 deletions.
77 changes: 77 additions & 0 deletions moiveRe/DeepFMcopy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import django
import os
import torch

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings")
django.setup()









import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import torch
from deepctr_torch.models import DeepFM
from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names

# 生成示例数据
np.random.seed(2024)
data_size = 10000

# 假设有 26 个离散特征 C1, C2, ..., C26
sparse_features = ['C' + str(i) for i in range(1,4)]
# 假设有 13 个连续特征 I1, I2, ..., I13
dense_features = ['I' + str(i) for i in range(1,3)]

data = {feat: np.random.choice([0, 1, 2, 3, 4], data_size) for feat in sparse_features}
data.update({feat: np.random.rand(data_size) for feat in dense_features})
data['label'] = np.random.randint(0, 2, data_size)

# 转换为 DataFrame
df = pd.DataFrame(data)
print(df)


#预处理部分:
# 对离散特征进行标签编码,转为数值的意思
for feat in sparse_features:
lbe = LabelEncoder()
df[feat] = lbe.fit_transform(df[feat])
# 对连续特征进行归一化
mms = MinMaxScaler()
df[dense_features] = mms.fit_transform(df[dense_features])

train, test = train_test_split(df, test_size=0.2)

# deepctr模型输入
fixlen_feature_columns = [SparseFeat(feat, df[feat].nunique(), embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1,) for feat in dense_features]




# DeepFM 模型,开导!
model = DeepFM(fixlen_feature_columns, fixlen_feature_columns, task='binary') # 重复传入相同的列~ 这里因为是根据CTR来对召回进行排序所以就是binary任务

# 编译模型
model.compile("adam", "binary_crossentropy",
metrics=["binary_crossentropy"], ) #优化器选择balabala,这里直接看库里给的文档

# 训练模型
train_model_input = {name: train[name].values for name in get_feature_names(fixlen_feature_columns)}
test_model_input = {name: test[name].values for name in get_feature_names(fixlen_feature_columns)}
label_distribution = train['label'].value_counts()
print('label_dis:',label_distribution)
history = model.fit(train_model_input, train['label'].values,
batch_size=50, epochs=4, verbose=2, validation_split=0, ) #fit开始train





50 changes: 0 additions & 50 deletions moiveRe/DeepFm.py
Original file line number Diff line number Diff line change
@@ -1,50 +0,0 @@
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, get_feature_names

# 假设有一个包含广告特征和点击情况的数据集
data = {
'user_id': [1, 2, 3, 4, 5],
'ad_id': [101, 102, 103, 104, 105],
'age': [25, 30, 35, 40, 45],
'gender': ['M', 'F', 'M', 'F', 'M'],
'click': [1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)

# 定义特征列
sparse_features = ['user_id', 'ad_id', 'gender']
dense_features = ['age']

# 将特征列转换为SparseFeat和DenseFeat类型
feature_columns = [SparseFeat(feat, vocabulary_size=df[feat].nunique(), embedding_dim=4)
for feat in sparse_features] + [DenseFeat(feat, 1) for feat in dense_features]

# 划分训练集和测试集
train, test = train_test_split(df, test_size=0.2)

# 创建模型
model = DeepFM(feature_columns, feature_columns, task='binary')

# 编译模型
model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'])

# 将数据转换为模型所需的输入格式
train_model_input = {name: train[name] for name in sparse_features + dense_features}
test_model_input = {name: test[name] for name in sparse_features + dense_features}
train_target = train['click'].values
test_target = test['click'].values

# 训练模型
history = model.fit(train_model_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

# 预测
preds = model.predict(test_model_input, batch_size=256)

# 输出预测结果
print("预测点击率:", preds)
26 changes: 26 additions & 0 deletions moiveRe/UserExperienceUpdate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@


import django
from django.conf import settings
import os

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings") # 替换 "yourproject.settings" 为你的项目的 settings 模块路径
django.setup()

from moiveReApp.models import UserExperience


# 兴趣标签集合
experience_tags = [
'NewtoGame','Middle','Experienced'
]

# 将标签添加到数据库中
for tag_name in experience_tags:
tag, created = UserExperience.objects.get_or_create(name=tag_name)

# 如果标签是新创建的,则打印一条消息
if created:
print(f'Created tag: {tag_name}')
else:
print(f'Tag already exists: {tag_name}')
9 changes: 9 additions & 0 deletions moiveRe/moiveReApp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,19 @@ class Userinterests(models.Model):
def __str__(self):
return self.name

class UserExperience(models.Model):
name = models.CharField(max_length=100, unique=True)

def __str__(self):
return self.name




class UserProfile(models.Model):
# 与用户关联的一对一字段,这里假设你使用了Django内置的User模型
user = models.OneToOneField(User, on_delete=models.CASCADE)
userExperience = models.ManyToManyField(UserExperience, related_name='user_experience')
interests = models.ManyToManyField(Userinterests, related_name='interested_users')
liked_items = models.ManyToManyField(Question, blank=True, related_name='liked_users')
click_items = models.ManyToManyField(Question, blank=True, related_name='clicked_users')
Expand Down
Loading

0 comments on commit 33b7f65

Please sign in to comment.