-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex.js
62 lines (54 loc) · 1.3 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/**
* @typedef {import('nlcst').Root} Root
* @typedef {import('nlcst').Word} Word
*/
import {toString} from 'nlcst-to-string'
// @ts-expect-error: untyped.
import posjs from 'pos'
import {SKIP, visit} from 'unist-util-visit'
const tagger = new posjs.Tagger()
/**
* Add part-of-speech (POS) tags.
*
* @returns
* Transform.
*/
export default function retextPos() {
/**
* Transform.
*
* @param {Root} tree
* Tree.
* @returns {undefined}
* Nothing.
*/
return function (tree) {
visit(tree, 'SentenceNode', function (node) {
/** @type {Array<Word>} */
const nodes = []
/** @type {Array<string>} */
const values = []
let index = -1
// Find words.
while (++index < node.children.length) {
const child = node.children[index]
if (child.type === 'WordNode') {
nodes.push(child)
values.push(toString(child))
}
}
// Apply tags if there are words.
if (nodes.length > 0) {
const tags = tagger.tag(values)
let index = -1
while (++index < tags.length) {
const node = nodes[index]
const data = node.data || (node.data = {})
data.partOfSpeech = tags[index][1]
}
}
// Don’t enter sentences.
return SKIP
})
}
}