-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsources.py
61 lines (51 loc) · 1.7 KB
/
sources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from util.sanitizer import Sanitizer
from util.source_reader import Source
from os import path
import constant
# Menyiapkan metadata
samples = {
"source": Source(
name="Samples Main",
the_path=path.join(path.dirname(__file__), "samples"),
log=True # Berisik cak
),
"sentiment_col": "Sentiment",
"text_col": "Text",
}
# menyiapkan sampel pelatihan
samples["source"]\
.prepare_for_sentiment_analysis(
sentiment_col=samples["sentiment_col"],
text_col=samples["text_col"],
f_sanitize=Sanitizer.sanitize
)
source_neutral = Source(
the_path=path.join(path.dirname(__file__), "samples", "neutral-sample"),
name="samples neutral",
log=False
)
# import numpy as np
# import collections
# dataframe = samples["source"].dataframes
# print(samples["source"].sentiment_values, collections.Counter(dataframe[0]))
# Memasukkan sampel pelatihan lain jika ada
other_samples = [
{
"path": path.join(path.dirname(__file__), "samples", "neutral-sample"),
"filter_equal": [samples["sentiment_col"], "neutral"]
}
]
if len(other_samples):
for sample in other_samples:
tmp = Source(name="Dummy", the_path=sample["path"], log=False)
tmp.fetch().flatten()
if "filter_equal" in sample:
l_filter = sample["filter_equal"]
tmp.dataframes = tmp.dataframes[tmp.dataframes[l_filter[0]] == l_filter[1]]
tmp.normalize_for_sentiment_analysis(
sentiment_col=samples["sentiment_col"],
text_col=samples["text_col"]
)
samples["source"].join_for_sentimental_analysis(tmp)
del tmp
# print(samples["source"].sentiment_values, collections.Counter(dataframe[0]))