-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcitron-extract
executable file
·81 lines (63 loc) · 1.84 KB
/
citron-extract
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# Copyright 2021 BBC
# Authors: Chris Newell <[email protected]>
#
# License: Apache-2.0
"""
This script runs Citron on the command line.
"""
import argparse
import logging
import json
import sys
from citron.citron import Citron
from citron.logger import logger
def main():
parser = argparse.ArgumentParser(
description="Extract quotes from text",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-v",
action = "store_true",
default = False,
help = "Verbose mode"
)
parser.add_argument("--model-path",
metavar = "model_path",
type = str,
required=True,
help = "Path to Citron model directory"
)
parser.add_argument("--input-file",
metavar = "input_file",
type = str,
help = "Optional: Otherwise read from stdin"
)
parser.add_argument("--output-file",
metavar = "output_file",
type = str,
help = "Optional: Otherwise write to stdout"
)
args = parser.parse_args()
if args.v:
logger.setLevel(logging.DEBUG)
citron = Citron(args.model_path)
if args.input_file is None:
text = ""
while True:
line = sys.stdin.readline()
if not line:
break
text += " " + line
else:
with open(args.input_file, encoding="utf-8") as infile:
text = infile.read()
results = citron.extract(text)
output = json.dumps(results, indent=4, sort_keys=False, ensure_ascii=False)
if args.output_file is None:
print(output)
else:
with open(args.output_file, "w", encoding="utf-8") as outfile:
outfile.write(output + "\n")
if __name__ == "__main__":
main()