-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQrySopScore.java
185 lines (153 loc) · 6.08 KB
/
QrySopScore.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/**
* Copyright (c) 2017, Carnegie Mellon University. All Rights Reserved.
*/
import java.io.*;
import java.lang.IllegalArgumentException;
/**
* The SCORE operator for all retrieval models.
*/
public class QrySopScore extends QrySop {
/**
* Document-independent values that should be determined just once.
* Some retrieval models have these, some don't.
*/
/**
* Indicates whether the query has a match.
* @param r The retrieval model that determines what is a match
* @return True if the query matches, otherwise false.
*/
public boolean docIteratorHasMatch (RetrievalModel r) {
return this.docIteratorHasMatchFirst (r);
}
/**
* Get a score for the document that docIteratorHasMatch matched.
* @param r The retrieval model that determines how scores are calculated.
* @return The document score.
* @throws IOException Error accessing the Lucene index
*/
public double getScore (RetrievalModel r) throws IOException {
if (r instanceof RetrievalModelUnrankedBoolean) {
return this.getScoreUnrankedBoolean (r);
}else if (r instanceof RetrievalModelRankedBoolean) {
return this.getScoreRankedBoolean (r);
} else if (r instanceof RetrievalModelBM25) {
return this.getScoreBM25 (r);
} else if (r instanceof RetrievalModelIndri) {
return this.getScoreIndri (r);
} else {
throw new IllegalArgumentException
(r.getClass().getName() + " doesn't support the SCORE operator.");
}
}
/**
* getScore for the BM25 retrieval model.
* @param r The retrieval model that determines how scores are calculated.
* @return The document score.
* @throws IOException Error accessing the Lucene index
*/
public double getScoreBM25 (RetrievalModel r) throws IOException {
if (! this.docIteratorHasMatchCache()) {
return 0.0;
} else {
Qry q = this.args.get(0);
String field = ((QryIop) q).field;
int doc_id = ((QryIop) q).docIteratorGetMatch();
int tf = ((QryIop) q).docIteratorGetMatchPosting().tf;
int df = ((QryIop) q).getDf();
double N = (double)Idx.getNumDocs();
float k1 = ((RetrievalModelBM25) r).getK_1();
float b = ((RetrievalModelBM25) r).getB();
long doc_len = Idx.getFieldLength(field, doc_id);
double RSJ_wt = Math.max(0, Math.log( (N - df + 0.5) / (df + 0.5) ));
double avg_doc_len = (double)Idx.getSumOfFieldLengths(field) / Idx.getDocCount(field);
double term_wt = tf / (tf + k1 * ( 1 - b + ( b * doc_len / avg_doc_len)));
return RSJ_wt * term_wt;
}
}
//Calculate score for Indri when the term is present in the doc using the formula.
public double getScoreIndri (RetrievalModel r) throws IOException {
if (! this.docIteratorHasMatchCache()) {
return 0.0;
} else {
//calculate probability
Qry q = this.args.get(0);
String field = ((QryIop)q).field;
int doc_id = ((QryIop)q).docIteratorGetMatch();
float lambda = ((RetrievalModelIndri)r).getLambda();
float mu = ((RetrievalModelIndri)r).getMu();
int tf = ((QryIop) q).docIteratorGetMatchPosting().tf;
double ctf = ((QryIop) q).getCtf();
double prob_mle_C = ctf / Idx.getSumOfFieldLengths(field);
double prob_q = (1 - lambda) * ( (tf + mu * prob_mle_C) / ( Idx.getFieldLength(field, doc_id) + mu) ) + lambda * prob_mle_C;
return prob_q;
}
}
//Calculate score for Indri when the term is present in the doc using the formula.
public double getDefaultScoreIndri (RetrievalModel r, int doc_id) throws IOException {
//calculate score if matched by getting the arguments frequency
Qry q = this.args.get(0);
String field = ((QryIop)q).field;
float lambda = ((RetrievalModelIndri)r).getLambda();
float mu = ((RetrievalModelIndri)r).getMu();
int tf = 0;
double ctf = ((QryIop) q).getCtf();
double prob_mle_C = ctf / Idx.getSumOfFieldLengths(field);
double prob_q = (1 - lambda) * ( (tf + mu * prob_mle_C) / ( Idx.getFieldLength(field, doc_id) + mu) ) + lambda * prob_mle_C;
return prob_q;
}
public double getDefaultScore (RetrievalModel r, int doc_id) throws IOException {
if (r instanceof RetrievalModelIndri) {
return this.getDefaultScoreIndri (r, doc_id);
} else {
throw new IllegalArgumentException
(r.getClass().getName() + " doesn't support the SCORE operator.");
}
}
/**
* getScore for the Unranked retrieval model.
* @param r The retrieval model that determines how scores are calculated.
* @return The document score.
* @throws IOException Error accessing the Lucene index
*/
public double getScoreUnrankedBoolean (RetrievalModel r) throws IOException {
if (! this.docIteratorHasMatchCache()) {
return 0.0;
} else {
return 1.0;
}
}
/**
* getScore for the Ranked retrieval model.
* @param r The retrieval model that determines how scores are calculated.
* @return The document score.
* @throws IOException Error accessing the Lucene index
*/
public double getScoreRankedBoolean (RetrievalModel r) throws IOException {
if (! this.docIteratorHasMatchCache()) {
return 0.0;
} else {
//calculate score if matched by getting the arguments frequency
Qry q = this.args.get(0);
return ((QryIop) q).docIteratorGetMatchPosting().tf;
}
}
/**
* Initialize the query operator (and its arguments), including any
* internal iterators. If the query operator is of type QryIop, it
* is fully evaluated, and the results are stored in an internal
* inverted list that may be accessed via the internal iterator.
* @param r A retrieval model that guides initialization
* @throws IOException Error accessing the Lucene index.
*/
public void initialize (RetrievalModel r) throws IOException {
Qry q = this.args.get (0);
q.initialize (r);
}
//override the getweight function of Qry
//This method will be called by QrySopXXX classes to get the weights for that particular term.
// @Override
// public float getWeight() {
//
// return this.args.get(0).getWeight();
// }
}