forked from shuita2333/Calorific_value_calculation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo.py
121 lines (105 loc) · 5.51 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from Preprocessing.read_file_to_list import get_user_data_list, get_news_data_list, get_emotion_data_list
from calculation_of_category.News_number_calculate import count_exceeding_messages
from calculation_of_category.Topic_sensitivity_calculation import classify_string
from calculation_of_category.User_engagement_calculate import count_messages_in_timeframe
import os
import openpyxl
import matplotlib.pyplot as plt
from calculation_of_category.cemotion_read_calculate import sum_emotion
'''
新主函数,使用预处理生成的情感数据进行分析
'''
time_subsection = 20 # 时间间隔数
time_num = 1 # 想要预测显示的时长间隔长度(需要是time_sume的整倍数)
time_sum = 0.5 # 计算用每个间隔的时间长度(固定值)
# comment_data_location = r'data\test\comment_sum\\' # 话题路径
title = '居民拒改昵称被移出业主群后起诉管理员' # 话题名称
hotspots_boundary = 10 # 热度概率计算调整值(上线60)
log_total = 10 # 总评论数计算公式log值
log_maxH = 10 # 最多小时评论数计算公式log值
par_U_dc = 1 # 总评论数参数
par_U_c = 1 # 最多小时评论数参数
par_N = 5 # 媒体关注度除法分母参数(不能为零)
def run(comment_data_name, time_subsection, time_sum, time_num):
wb = openpyxl.Workbook()
ws = wb.active
# 添加包含时间信息的标题行
ws.append(
['Filename', 'Time Interval', '热度值', '热点事件概率', '用户关注度', '用户关注增长率', '总评论数量化值',
'小时最评论数量化值',
'媒体关注度',
'话题敏感度', '用户情感值', '情感倾向'])
# 构建完整文件路径
full_path = os.path.join('data', 'test', 'comment_sum', comment_data_name + '.xlsx')
time_intervals = []
T_values = []
hot_events_probability = []
factor=int(time_num / time_sum)
for i in range(1, time_subsection * factor + 1):
# 计算当前时间段末尾的时间
current_time = i * time_sum
if current_time % time_num != 0:
continue
hours = int(current_time)
minutes_with_seconds = (current_time - hours) * 60
minutes = int(minutes_with_seconds)
formatted_time_short = f"{hours}:{minutes}"
seconds = int((minutes_with_seconds - minutes) * 60)
formatted_time = f"{hours}:{minutes}:{seconds}"
time_intervals.append(formatted_time_short)
user_data_list = get_user_data_list(r'data\test\comment_sum\\' + comment_data_name + '.xlsx')
U, increace_count, total_m, max_m = count_messages_in_timeframe(user_data_list, log_total, log_maxH, par_U_dc,
par_U_c, i, current_time)
if U == -1:
T = 0
print(f"========!!!{comment_data_name}-话题为空!!!========")
else:
news_data_list = get_news_data_list(r'data\test\comment\\' + comment_data_name)
N = count_exceeding_messages(news_data_list, par_N, current_time)
heat_level = classify_string(comment_data_name, 'dataset')
# F = Cemotion_prodict(filename)
emotion_data_list = get_emotion_data_list(r'data\test\comment_sum\\' + comment_data_name + '.xlsx')
F = sum_emotion(emotion_data_list, current_time) * 10
if F > 0:
f = 1
else:
f = -1
F = abs(F)
T = heat_level * (U + N) * F
if T < hotspots_boundary:
hot_event = T
elif T < 60:
hot_event = hotspots_boundary + (T - hotspots_boundary) * (100 - hotspots_boundary) / (
60 - hotspots_boundary)
else:
hot_event = 99
print(f"========{comment_data_name} --- 第{i/factor}时间段 {formatted_time}h时间计算完成========")
# 直接将结果写入Excel工作表
ws.append(
[comment_data_name, formatted_time, T, hot_event, U, increace_count, total_m, max_m, N, heat_level, F, f])
T_values.append(T)
hot_events_probability.append(hot_event)
# 保存Excel工作簿
wb.save(fr"data\analysis\{comment_data_name}_analysis.xlsx")
# 绘制折线图
plt.rcParams['font.family'] = 'SimHei'
fig, ax1 = plt.subplots() # 创建一个图和第一个y轴
ax1.plot(time_intervals, T_values, 'g-') # 绘制第一个数据集,例如用绿线
ax1.set_xlabel('时间间隔(小时)')
ax1.set_ylabel('热度值', color='g') # 设置第一个y轴标签
ax1.tick_params('y', colors='g')
# for i, txt in enumerate(T_values):
# ax1.annotate(f'{txt:.2f}', (time_intervals[i], T_values[i]), textcoords="offset points", xytext=(0, 10),
# ha='center', color='g')
ax2 = ax1.twinx() # 创建第二个y轴
ax2.plot(time_intervals, hot_events_probability, 'b-') # 在第二个y轴上绘制数据,例如用蓝线
ax2.set_ylabel('热点事件概率', color='b') # 设置第二个y轴标签
ax2.tick_params('y', colors='b')
for i, txt in enumerate(hot_events_probability):
ax2.annotate(f'{txt:.1f}', (time_intervals[i], hot_events_probability[i]), textcoords="offset points",
xytext=(0, 10), ha='center', color='b')
plt.title(f'{comment_data_name}中,每{time_num}h分段的热度值')
plt.savefig(fr"data\analysis\{comment_data_name}_trend.png")
plt.show()
if __name__ == '__main__':
T = run(title, time_subsection, time_sum, time_num)