-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a90aaa7
commit 33b7f65
Showing
10 changed files
with
819 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import django | ||
import os | ||
import torch | ||
|
||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings") | ||
django.setup() | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
import numpy as np | ||
import pandas as pd | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.preprocessing import LabelEncoder, MinMaxScaler | ||
import torch | ||
from deepctr_torch.models import DeepFM | ||
from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names | ||
|
||
# 生成示例数据 | ||
np.random.seed(2024) | ||
data_size = 10000 | ||
|
||
# 假设有 26 个离散特征 C1, C2, ..., C26 | ||
sparse_features = ['C' + str(i) for i in range(1,4)] | ||
# 假设有 13 个连续特征 I1, I2, ..., I13 | ||
dense_features = ['I' + str(i) for i in range(1,3)] | ||
|
||
data = {feat: np.random.choice([0, 1, 2, 3, 4], data_size) for feat in sparse_features} | ||
data.update({feat: np.random.rand(data_size) for feat in dense_features}) | ||
data['label'] = np.random.randint(0, 2, data_size) | ||
|
||
# 转换为 DataFrame | ||
df = pd.DataFrame(data) | ||
print(df) | ||
|
||
|
||
#预处理部分: | ||
# 对离散特征进行标签编码,转为数值的意思 | ||
for feat in sparse_features: | ||
lbe = LabelEncoder() | ||
df[feat] = lbe.fit_transform(df[feat]) | ||
# 对连续特征进行归一化 | ||
mms = MinMaxScaler() | ||
df[dense_features] = mms.fit_transform(df[dense_features]) | ||
|
||
train, test = train_test_split(df, test_size=0.2) | ||
|
||
# deepctr模型输入 | ||
fixlen_feature_columns = [SparseFeat(feat, df[feat].nunique(), embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1,) for feat in dense_features] | ||
|
||
|
||
|
||
|
||
# DeepFM 模型,开导! | ||
model = DeepFM(fixlen_feature_columns, fixlen_feature_columns, task='binary') # 重复传入相同的列~ 这里因为是根据CTR来对召回进行排序所以就是binary任务 | ||
|
||
# 编译模型 | ||
model.compile("adam", "binary_crossentropy", | ||
metrics=["binary_crossentropy"], ) #优化器选择balabala,这里直接看库里给的文档 | ||
|
||
# 训练模型 | ||
train_model_input = {name: train[name].values for name in get_feature_names(fixlen_feature_columns)} | ||
test_model_input = {name: test[name].values for name in get_feature_names(fixlen_feature_columns)} | ||
label_distribution = train['label'].value_counts() | ||
print('label_dis:',label_distribution) | ||
history = model.fit(train_model_input, train['label'].values, | ||
batch_size=50, epochs=4, verbose=2, validation_split=0, ) #fit开始train | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +0,0 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
from sklearn.model_selection import train_test_split | ||
from deepctr.models import DeepFM | ||
from deepctr.feature_column import SparseFeat, get_feature_names | ||
|
||
# 假设有一个包含广告特征和点击情况的数据集 | ||
data = { | ||
'user_id': [1, 2, 3, 4, 5], | ||
'ad_id': [101, 102, 103, 104, 105], | ||
'age': [25, 30, 35, 40, 45], | ||
'gender': ['M', 'F', 'M', 'F', 'M'], | ||
'click': [1, 0, 1, 0, 1] | ||
} | ||
df = pd.DataFrame(data) | ||
|
||
# 定义特征列 | ||
sparse_features = ['user_id', 'ad_id', 'gender'] | ||
dense_features = ['age'] | ||
|
||
# 将特征列转换为SparseFeat和DenseFeat类型 | ||
feature_columns = [SparseFeat(feat, vocabulary_size=df[feat].nunique(), embedding_dim=4) | ||
for feat in sparse_features] + [DenseFeat(feat, 1) for feat in dense_features] | ||
|
||
# 划分训练集和测试集 | ||
train, test = train_test_split(df, test_size=0.2) | ||
|
||
# 创建模型 | ||
model = DeepFM(feature_columns, feature_columns, task='binary') | ||
|
||
# 编译模型 | ||
model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy']) | ||
|
||
# 将数据转换为模型所需的输入格式 | ||
train_model_input = {name: train[name] for name in sparse_features + dense_features} | ||
test_model_input = {name: test[name] for name in sparse_features + dense_features} | ||
train_target = train['click'].values | ||
test_target = test['click'].values | ||
|
||
# 训练模型 | ||
history = model.fit(train_model_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.2) | ||
|
||
# 预测 | ||
preds = model.predict(test_model_input, batch_size=256) | ||
|
||
# 输出预测结果 | ||
print("预测点击率:", preds) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
|
||
import django | ||
from django.conf import settings | ||
import os | ||
|
||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "moiveRe.settings") # 替换 "yourproject.settings" 为你的项目的 settings 模块路径 | ||
django.setup() | ||
|
||
from moiveReApp.models import UserExperience | ||
|
||
|
||
# 兴趣标签集合 | ||
experience_tags = [ | ||
'NewtoGame','Middle','Experienced' | ||
] | ||
|
||
# 将标签添加到数据库中 | ||
for tag_name in experience_tags: | ||
tag, created = UserExperience.objects.get_or_create(name=tag_name) | ||
|
||
# 如果标签是新创建的,则打印一条消息 | ||
if created: | ||
print(f'Created tag: {tag_name}') | ||
else: | ||
print(f'Tag already exists: {tag_name}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.