-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathModel.js
147 lines (123 loc) · 3.13 KB
/
Model.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import Vocabulary from './Vocabulary'
/**
* @param {Object} [config]
* @param {int} [config.nGramMin=1] - Minimum n-gram size
* @param {int} [config.nGramMax=1] - Maximum n-gram size
* @param {(Array|Set|false)} [config.vocabulary=[]] - Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries
* @param {Object} [config.data={}] - Key-value store containing all training data
* @constructor
*/
class Model {
constructor(config = {}) {
if (Object.getPrototypeOf(config) !== Object.prototype) {
throw new Error('config must be an object literal')
}
config = {
nGramMin: 1,
nGramMax: 1,
vocabulary: [],
data: {},
...config
}
if (config.nGramMin !== parseInt(config.nGramMin, 10)) {
throw new Error('Config value nGramMin must be an integer')
}
if (config.nGramMax !== parseInt(config.nGramMax, 10)) {
throw new Error('Config value nGramMax must be an integer')
}
if (config.nGramMin < 1) {
throw new Error('Config value nGramMin must be at least 1')
}
if (config.nGramMax < 1) {
throw new Error('Config value nGramMax must be at least 1')
}
if (config.nGramMax < config.nGramMin) {
throw new Error('Invalid nGramMin/nGramMax combination in config')
}
if (
config.vocabulary !== false &&
!(config.vocabulary instanceof Vocabulary)
) {
config.vocabulary = new Vocabulary(config.vocabulary)
}
if (Object.getPrototypeOf(config.data) !== Object.prototype) {
throw new Error('Config value data must be an object literal')
}
this._nGramMin = config.nGramMin
this._nGramMax = config.nGramMax
this._vocabulary = config.vocabulary
this._data = { ...config.data }
}
/**
* Minimum n-gram size
*
* @type {int}
*/
get nGramMin() {
return this._nGramMin
}
set nGramMin(size) {
if (size !== parseInt(size, 10)) {
throw new Error('nGramMin must be an integer')
}
this._nGramMin = size
}
/**
* Maximum n-gram size
*
* @type {int}
*/
get nGramMax() {
return this._nGramMax
}
set nGramMax(size) {
if (size !== parseInt(size, 10)) {
throw new Error('nGramMax must be an integer')
}
this._nGramMax = size
}
/**
* Vocabulary instance
*
* @type {(Vocabulary|false)}
*/
get vocabulary() {
return this._vocabulary
}
set vocabulary(vocabulary) {
if (vocabulary !== false && !(vocabulary instanceof Vocabulary)) {
vocabulary = new Vocabulary(vocabulary)
}
this._vocabulary = vocabulary
}
/**
* Model data
*
* @type {Object}
*/
get data() {
return this._data
}
set data(data) {
if (!(data instanceof Object) || data.constructor !== Object) {
throw new Error('data must be an object literal')
}
this._data = { ...data }
}
/**
* Return the model in its current state an an object literal, including the
* configured n-gram min/max values, the vocabulary as an array (if any,
* otherwise false), and an object literal with all the training data
*
* @return {Object}
*/
serialize() {
return {
nGramMin: this._nGramMin,
nGramMax: this._nGramMax,
vocabulary: Array.from(this._vocabulary.terms),
data: this._data
}
}
}
export default Model