-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmaker.go
126 lines (114 loc) · 4.18 KB
/
maker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//Functions that make data such as fake http response, collection meta data, document error data, new documents and collections.
package siw
import (
"bufio"
"bytes"
"io/ioutil"
"log"
"net/http"
"time"
)
// Make a mock response for cases where transport.RoundTrip fails
func MakeMockResponse(req *http.Request, body string) *http.Response {
resp := &http.Response{
Status: "422 Unprocessable Entity",
StatusCode: 422,
Proto: "HTTP/1.0",
ProtoMajor: 1,
ProtoMinor: 0,
Body: ioutil.NopCloser(bytes.NewBufferString(body)),
ContentLength: int64(len(body)),
Request: req,
}
return resp
}
// Make Document
// Stores HttpResponse and parses text into words and sentences
// Also tracks time it took to build
// Returns Document channel
func MakeNewDocument(textset []string, id int, label string, dChan chan Document, hresp *HttpResponse) {
t0 := time.Now()
doc := Document{
id: id,
label: label,
httpres: hresp,
}
for _, sent := range textset {
for _, token := range Cut(sent) {
doc.words = append(doc.words, token)
}
doc.sentences = append(doc.sentences, sent)
}
doc.dBuildTime = time.Since(t0) // shorthand for time.Now().Sub(t0)
dChan <- doc
}
// Collection of documents from Web requests
// Builds NewDocument and adds it to Collection
// Tracks time it took to build
// Returns Collection
func MakeNewCollection(idx *Indexer) (coll Collection) {
t0 := time.Now()
doC := make(chan Document)
tset := []string{}
count := 0
// returns HttpResponse
transpo := Transporter(idx)
for _, r := range transpo {
scanner := bufio.NewScanner(r.response.Body)
count += 1
for scanner.Scan() {
tset = append(tset, scanner.Text())
}
go MakeNewDocument(tset, count, r.url, doC, r)
doc_reciever := <-doC
coll.DocList = append(coll.DocList, &doc_reciever)
}
coll.BuildTime = time.Since(t0)
return
}
// Write to stdout MetaData about the Collection
func MakeCollectionVis(coll *Collection) {
size := len(coll.DocList)
total_words := 0
total_sentences := 0
total_unretrieved := 0
for _, doc := range coll.DocList {
total_words += len(doc.words)
total_sentences += len(doc.sentences)
if doc.httpres.err != nil {
total_unretrieved += 1
}
}
total_retrieved := size - total_unretrieved
success_percent := float64(total_retrieved) / float64(size) * 100
log.Printf(
"\nCollection build time = %v \n Collection size (# of documents) = %d\n Total words = %d \n Total Sentences = %d\n Total Unretrieved = %d \n Total Retrieved = %d \n Success = %f percent \n \n",
coll.BuildTime,
size,
total_words,
total_sentences,
total_unretrieved,
total_retrieved,
success_percent,
)
}
// Write stdout metadata on requests
//TODO: format error output in a more Go-like style
func MakeDocumentVis(coll *Collection) Collection {
return *coll
/*
for _, dval := range coll.docList {
log.Printf("\nCollection Build Time = %v \n DocBuildTime: %v \n DocId: %d \n DocLabel: %s \n DocWords: %d \n DocSentences: %d\n DocError %v \n DocStatus: %s \n DocStatusCode: %d \n DocProtocol: %s \n DocHeader: %v\n\n", coll.cBuildTime, dval.dBuildTime, dval.id, dval.label, len(dval.words), len(dval.sentences), dval.httpres.err, dval.httpres.response.Status, dval.httpres.response.StatusCode, dval.httpres.response.Proto, dval.httpres.response.Header)
}
*/
}
// Write stdout metadata on requests that failed
//TODO: format error output in a more Go-like style
func MakeDocErrorsVis(coll *Collection) {
for _, dval := range coll.DocList {
if dval.httpres.err != nil {
log.Printf("\nCollection Build Time = %v, \n DocId: %d \n DocBuildTime: %v, \n DocLabel: %s \n DocWords: %d \n DocSentences: %d\n DocError %v\n", coll.BuildTime, dval.id, dval.dBuildTime, dval.label, len(dval.words), len(dval.sentences), dval.httpres.err)
log.Printf("\nAsyncError: %v \n AsyncMessage: %s \n AsyncUrl: %s \n AsyncCode: %d \n AsyncErrorRequestURL: %v \n AsyncErrorRequestProto: %v \n AsyncErrorRequestProtoMajor: %v \n\n", dval.httpres.asyncErr.Error, dval.httpres.asyncErr.Message, dval.httpres.asyncErr.Url, dval.httpres.asyncErr.Code, dval.httpres.asyncErr.errRequest.URL, dval.httpres.asyncErr.errRequest.Proto, dval.httpres.asyncErr.errRequest.ProtoMajor)
}
}
}