Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
91.91% |
159 / 173 |
|
78.95% |
15 / 19 |
CRAP | |
0.00% |
0 / 1 |
Explanation | |
91.91% |
159 / 173 |
|
78.95% |
15 / 19 |
60.84 | |
0.00% |
0 / 1 |
getMaxScore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTotalScore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getBaseScore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getBoost | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCoord | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getExplanation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRest | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getExplanationForRest | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMaxFields | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMinPercentage | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDecimalPlaces | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
performRequest | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
1 | |||
build | |
97.06% |
33 / 34 |
|
0.00% |
0 / 1 |
12 | |||
cleanLines | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
buildRecursive | |
97.92% |
47 / 48 |
|
0.00% |
0 / 1 |
22 | |||
getLevel | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getChildLines | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
parseLine | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
parseExplainElement | |
68.57% |
24 / 35 |
|
0.00% |
0 / 1 |
7.12 |
1 | <?php |
2 | |
3 | /** |
4 | * Solr Explanation |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Hebis Verbundzentrale 2023. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search_Solr |
25 | * @author Dennis Schrittenlocher <Dennis.Schrittenlocher@outlook.de> |
26 | * @author Thomas Wagener <wagener@hebis.uni-frankfurt.de> |
27 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
28 | * @link https://vufind.org Main Page |
29 | */ |
30 | |
31 | namespace VuFind\Search\Solr; |
32 | |
33 | use VuFindSearch\Backend\Solr\Command\RawJsonSearchCommand; |
34 | use VuFindSearch\ParamBag; |
35 | |
36 | use function count; |
37 | use function floatval; |
38 | use function strlen; |
39 | |
40 | /** |
41 | * Solr Explanation |
42 | * |
43 | * @category VuFind |
44 | * @package Search_Solr |
45 | * @author Dennis Schrittenlocher <Dennis.Schrittenlocher@outlook.de> |
46 | * @author Thomas Wagener <wagener@hebis.uni-frankfurt.de> |
47 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
48 | * @link https://vufind.org Main Page |
49 | */ |
50 | class Explanation extends \VuFind\Search\Base\Explanation |
51 | { |
52 | /** |
53 | * Value of the relevance score of the best match. |
54 | * |
55 | * @var float |
56 | */ |
57 | protected $maxScore; |
58 | |
59 | /** |
60 | * Relevance score of the title with the recordId. |
61 | * |
62 | * @var float |
63 | */ |
64 | protected $totalScore; |
65 | |
66 | /** |
67 | * Relevance score of the title with the recordId without modifiers (boost / coord). |
68 | * |
69 | * @var float |
70 | */ |
71 | protected $baseScore; |
72 | |
73 | /** |
74 | * Value of boost. |
75 | * |
76 | * @var float |
77 | */ |
78 | protected $boost; |
79 | |
80 | /** |
81 | * Value of coord. If only 2 out of 4 search query parts match, then coord would be 1/2. |
82 | * It adjusts the score so that the 2 other search query parts also influence the score. |
83 | * |
84 | * @var float |
85 | */ |
86 | protected $coord; |
87 | |
88 | /** |
89 | * The main result of the explain class, |
90 | * an array with every match and its values. |
91 | * |
92 | * @var array |
93 | */ |
94 | protected $explanation = []; |
95 | |
96 | /** |
97 | * Describes the rest. It has restValue and the percentage from total value. |
98 | * |
99 | * @var ?array |
100 | */ |
101 | protected $rest = null; |
102 | |
103 | /** |
104 | * Contains the fields that were removed from the main explanation. |
105 | * |
106 | * @var array |
107 | */ |
108 | protected $explanationForRest = []; |
109 | |
110 | /** |
111 | * Get relevance value of best scoring title. |
112 | * |
113 | * @return float |
114 | */ |
115 | public function getMaxScore() |
116 | { |
117 | return $this->maxScore; |
118 | } |
119 | |
120 | /** |
121 | * Get relevance score of this title. |
122 | * |
123 | * @return float |
124 | */ |
125 | public function getTotalScore() |
126 | { |
127 | return $this->totalScore; |
128 | } |
129 | |
130 | /** |
131 | * Get relevance score of this title without modifier (boost/coord). |
132 | * |
133 | * @return float |
134 | */ |
135 | public function getBaseScore() |
136 | { |
137 | return $this->baseScore; |
138 | } |
139 | |
140 | /** |
141 | * Get value of the boost used in Solr query. |
142 | * |
143 | * @return float |
144 | */ |
145 | public function getBoost() |
146 | { |
147 | return $this->boost; |
148 | } |
149 | |
150 | /** |
151 | * Get value of coord. |
152 | * |
153 | * @return float |
154 | */ |
155 | public function getCoord() |
156 | { |
157 | return $this->coord; |
158 | } |
159 | |
160 | /** |
161 | * Get the explanation, parsed from Solr response. |
162 | * |
163 | * @return array |
164 | */ |
165 | public function getExplanation() |
166 | { |
167 | return $this->explanation; |
168 | } |
169 | |
170 | /** |
171 | * Get rest. It has restValue and the percentage from total value. |
172 | * |
173 | * @return array |
174 | */ |
175 | public function getRest() |
176 | { |
177 | return $this->rest; |
178 | } |
179 | |
180 | /** |
181 | * Get the explanation for the rest. |
182 | * |
183 | * @return array |
184 | */ |
185 | public function getExplanationForRest() |
186 | { |
187 | return $this->explanationForRest; |
188 | } |
189 | |
190 | /** |
191 | * Get the maximal number of fields to be included. |
192 | * |
193 | * @return int |
194 | */ |
195 | public function getMaxFields() |
196 | { |
197 | return $this->config['Explain']['maxFields'] ?? -1; |
198 | } |
199 | |
200 | /** |
201 | * Get the minimal percentage for fields to be included. |
202 | * |
203 | * @return float |
204 | */ |
205 | public function getMinPercentage() |
206 | { |
207 | return $this->config['Explain']['minPercent'] ?? 0; |
208 | } |
209 | |
210 | /** |
211 | * Get number of decimal places for to be shown in the explanation. |
212 | * |
213 | * @return int |
214 | */ |
215 | public function getDecimalPlaces() |
216 | { |
217 | return $this->config['Explain']['decimalPlaces'] ?? 2; |
218 | } |
219 | |
220 | /** |
221 | * Performing request and creating explanation. |
222 | * |
223 | * @param string $recordId Record Id |
224 | * |
225 | * @throws \VuFindSearch\Backend\Exception\BackendException |
226 | * @return void |
227 | */ |
228 | public function performRequest($recordId) |
229 | { |
230 | // get search query |
231 | $query = $this->getParams()->getQuery(); |
232 | |
233 | // prepare search params |
234 | $params = $this->getParams()->getBackendParameters(); |
235 | $params->set('spellcheck', 'false'); |
236 | $explainParams = new ParamBag([ |
237 | 'fl' => 'id,score', |
238 | 'facet' => 'true', |
239 | 'debug' => 'true', |
240 | 'indent' => 'true', |
241 | 'param' => 'q', |
242 | 'echoParams' => 'all', |
243 | 'explainOther' => 'id:"' . addcslashes($recordId, '"') . '"', |
244 | ]); |
245 | $params->mergeWith($explainParams); |
246 | |
247 | // perform request |
248 | $explainCommand = new RawJsonSearchCommand( |
249 | 'Solr', |
250 | $query, |
251 | 0, |
252 | 0, |
253 | $params, |
254 | true |
255 | ); |
256 | $explainCommand = $this->searchService->invoke($explainCommand); |
257 | $response = $explainCommand->getResult(); |
258 | |
259 | // build explanation |
260 | $this->build($response, $recordId); |
261 | } |
262 | |
263 | /** |
264 | * Builds explanation and sets up debug message to see raw Solr response. |
265 | * |
266 | * @param array $response Solr response |
267 | * @param string $recordId recordId of title for Solr explainOther |
268 | * |
269 | * @throws \VuFindSearch\Backend\Exception\BackendException |
270 | * @return void |
271 | */ |
272 | protected function build($response, $recordId) |
273 | { |
274 | // prepare parsing |
275 | $recordId = str_replace(['\(', '\)'], ['(', ')'], $recordId); |
276 | |
277 | if (empty($lines = $response['debug']['explainOther'][$recordId])) { |
278 | throw new \VuFindSearch\Backend\Exception\BackendException( |
279 | "No explainOther was returned for record {$recordId}" |
280 | ); |
281 | } |
282 | |
283 | $this->debug($lines); |
284 | $lines = $this->cleanLines($lines); |
285 | |
286 | // get basic values |
287 | $this->lookfor = strtolower($response['debug']['rawquerystring']); |
288 | $this->recordId = $recordId; |
289 | $this->maxScore = $response['response']['maxScore']; |
290 | $this->totalScore = $this->parseLine($lines[0])['value']; |
291 | $this->baseScore = $this->totalScore; |
292 | |
293 | // handle boost |
294 | if (($response['responseHeader']['params']['boost'] ?? false) && count($lines) > 1) { |
295 | $this->boost = $this->parseLine(array_pop($lines)); |
296 | if ($this->boost['value'] > 0) { |
297 | $this->baseScore = $this->baseScore / $this->boost['value']; |
298 | } |
299 | } |
300 | |
301 | // handle coord |
302 | if (!empty($lines) && str_contains($this->parseLine(end($lines))['description'], 'coord')) { |
303 | $this->coord = $this->parseLine(end($lines)); |
304 | if ($this->coord['value'] > 0) { |
305 | $this->baseScore = $this->baseScore / $this->coord['value']; |
306 | } |
307 | } |
308 | |
309 | // build explanation |
310 | $this->buildRecursive(array_reverse($lines), 1); |
311 | |
312 | // sort explanation descending by value |
313 | usort($this->explanation, function ($a, $b) { |
314 | return $b['value'] <=> $a['value']; |
315 | }); |
316 | |
317 | // remove fields that exceed the fields limit and add them to rest |
318 | $maxFields = $this->getMaxFields(); |
319 | if ($maxFields >= 0 && count($this->explanation) > $maxFields) { |
320 | $explanationForRest = array_splice($this->explanation, $maxFields, count($this->explanation) - $maxFields); |
321 | $this->explanationForRest = array_merge($this->explanationForRest, $explanationForRest); |
322 | } |
323 | |
324 | // handle rest |
325 | if (count($this->explanationForRest) > 0) { |
326 | usort($this->explanationForRest, function ($a, $b) { |
327 | return $b['value'] <=> $a['value']; |
328 | }); |
329 | |
330 | $restValue = array_sum(array_column($this->explanationForRest, 'value')); |
331 | if ($this->baseScore > 0) { |
332 | $this->rest = ['value' => $restValue, 'percent' => 100 * $restValue / $this->baseScore]; |
333 | } else { |
334 | $this->rest = ['value' => $restValue, 'percent' => 0]; |
335 | } |
336 | } |
337 | } |
338 | |
339 | /** |
340 | * Norms the response by replacing expressions to support |
341 | * all versions of Solr. Removes empty lines. |
342 | * |
343 | * @param string $lines raw lines |
344 | * |
345 | * @return array normed lines |
346 | */ |
347 | protected function cleanLines($lines) |
348 | { |
349 | $lines = preg_replace('/\\n\), product/', '), product', $lines); |
350 | $lines = preg_replace('/ \(MATCH\)/', '', $lines); |
351 | $lines = preg_replace('/ max of/', 'max plus 0 times others of', $lines); |
352 | $lines = preg_replace('/ConstantScore/', 'const weight', $lines); |
353 | $lines = preg_replace('/No match/', 'Failure to meet condition(s)', $lines); |
354 | $lines = explode("\n", $lines); |
355 | $lines = array_values(array_filter($lines, function ($value) { |
356 | return !empty($value); |
357 | })); |
358 | return $lines; |
359 | } |
360 | |
361 | /** |
362 | * Goes through each line of explainOther response |
363 | * adding a matched field to $explanation. |
364 | * |
365 | * @param array $lines Solr lines |
366 | * @param float $modifier 1 (* tieValue) |
367 | * |
368 | * @throws \VuFindSearch\Backend\Exception\BackendException |
369 | * @return string Solr lines without the last inspected line |
370 | */ |
371 | protected function buildRecursive($lines, $modifier) |
372 | { |
373 | $line = array_pop($lines); |
374 | $curLevel = $this->getLevel($line); |
375 | |
376 | $info = $this->parseLine($line); |
377 | $value = $info['value']; |
378 | $description = $info['description']; |
379 | |
380 | if (str_contains($description, 'Failure to meet condition(s)')) { |
381 | throw new \VuFindSearch\Backend\Exception\BackendException( |
382 | "Record {$this->getRecordId()} fails to match arguments." |
383 | ); |
384 | } |
385 | |
386 | $isMaxPlusOthers = preg_match( |
387 | '/max plus (?<tieValue>[0-9.]*(E-\d+)?) times others of:/', |
388 | $description, |
389 | $matches |
390 | ); |
391 | |
392 | // get max child |
393 | if ($isMaxPlusOthers) { |
394 | $maxValue = 0; |
395 | $maxChild = null; |
396 | foreach ($this->getChildLines($lines, $curLevel) as $child) { |
397 | if ($this->parseLine($child)['value'] > $maxValue) { |
398 | $maxValue = $this->parseLine($child)['value']; |
399 | $maxChild = $child; |
400 | } |
401 | } |
402 | } |
403 | |
404 | // summary of lower children |
405 | if ( |
406 | ( |
407 | (str_contains($description, 'product of:') || str_contains($description, 'sum of') || $isMaxPlusOthers) |
408 | && !str_contains($description, 'weight') |
409 | ) |
410 | || str_contains($description, 'weight(FunctionScoreQuery') |
411 | ) { |
412 | // build children |
413 | while (!empty($lines) && $this->getLevel(end($lines)) > $curLevel) { |
414 | if (!$isMaxPlusOthers || end($lines) == $maxChild) { |
415 | $lines = $this->buildRecursive($lines, $modifier); |
416 | } else { |
417 | $lines = $this->buildRecursive($lines, $modifier * $matches['tieValue']); |
418 | } |
419 | } |
420 | // match in field |
421 | } elseif (str_contains($description, 'weight') && !str_contains($description, 'FunctionScoreQuery')) { |
422 | // parse explaining element |
423 | $currentValue = $value * $modifier; |
424 | if ($this->baseScore > 0) { |
425 | $percentage = 100 * $currentValue / $this->baseScore; |
426 | } else { |
427 | $percentage = 0; |
428 | } |
429 | |
430 | // get fieldModifier and remove unused higher level lines |
431 | $fieldModifier = null; |
432 | if (str_contains($description, 'const weight')) { |
433 | $fieldModifier = 0; |
434 | } |
435 | while (!empty($lines) && $curLevel < $this->getLevel(end($lines))) { |
436 | $childLine = array_pop($lines); |
437 | $childInfo = $this->parseLine($childLine); |
438 | $childValue = $childInfo['value']; |
439 | $childDescription = $childInfo['description']; |
440 | if ($childDescription === ' boost') { |
441 | $fieldModifier = $childValue; |
442 | } |
443 | } |
444 | |
445 | // add to rest if lower than min percentage |
446 | $explainElement = $this->parseExplainElement($currentValue, $description, $percentage, $fieldModifier); |
447 | if ($percentage < $this->getMinPercentage()) { |
448 | $this->explanationForRest[] = $explainElement; |
449 | } else { |
450 | $this->explanation[] = $explainElement; |
451 | } |
452 | } |
453 | return $lines; |
454 | } |
455 | |
456 | /** |
457 | * Returns indent of a line. |
458 | * |
459 | * @param string $line Line |
460 | * |
461 | * @return int |
462 | */ |
463 | protected function getLevel($line) |
464 | { |
465 | return (strlen($line) - strlen(ltrim($line))) / 2; |
466 | } |
467 | |
468 | /** |
469 | * Gets all lines with one level higher than the parent line. |
470 | * |
471 | * @param array $lines Lines |
472 | * @param int $level Level |
473 | * |
474 | * @return array |
475 | */ |
476 | protected function getChildLines($lines, $level) |
477 | { |
478 | $res = []; |
479 | while (!empty($lines) && $this->getLevel(end($lines)) > $level) { |
480 | $line = array_pop($lines); |
481 | if ($this->getLevel($line) == $level + 1) { |
482 | $res[] = $line; |
483 | } |
484 | } |
485 | return $res; |
486 | } |
487 | |
488 | /** |
489 | * Extracts value and description of a line. |
490 | * |
491 | * @param string $line Line |
492 | * |
493 | * @return array |
494 | */ |
495 | protected function parseLine($line) |
496 | { |
497 | $info = explode('=', $line, 2); |
498 | return [ |
499 | 'value' => floatval($info[0]), |
500 | 'description' => $info[1], |
501 | ]; |
502 | } |
503 | |
504 | /** |
505 | * Unites all infos of a match to an explainElement. |
506 | * |
507 | * @param float $value Value |
508 | * @param string $description Description |
509 | * @param float $percentage Percentage |
510 | * @param float $fieldModifier Field Modifier |
511 | * |
512 | * @return array |
513 | */ |
514 | protected function parseExplainElement($value, $description, $percentage, $fieldModifier) |
515 | { |
516 | $res = [ |
517 | 'value' => $value, |
518 | 'percent' => $percentage, |
519 | 'fieldName' => ['unknown'], |
520 | 'fieldValue' => ['unknown'], |
521 | 'exactMatch' => ['unknown'], |
522 | ]; |
523 | if ( |
524 | preg_match( |
525 | '/weight\(Synonym\((?<synonyms>([^:]+:(\"([^\"]+\s?)+[^\"]+\"|\w+)\s?)+)\)(.+?(?= in))?/u', |
526 | $description, |
527 | $matches |
528 | ) |
529 | ) { |
530 | preg_match_all( |
531 | '/(?<fieldName>[^:\s]+):(?<fieldValue>\"[^"]+\"|\w+)/u', |
532 | $matches['synonyms'], |
533 | $synonymMatches |
534 | ); |
535 | $fieldValues = array_map(function ($fieldValue) { |
536 | return str_replace('"', '', $fieldValue); |
537 | }, $synonymMatches['fieldValue']); |
538 | $res['fieldName'] = $synonymMatches['fieldName']; |
539 | $res['fieldValue'] = $fieldValues; |
540 | // extra space to only exact match whole words |
541 | $res['exactMatch'] = array_map(function ($fieldValue) { |
542 | return str_contains($this->lookfor . ' ', $fieldValue . ' ') ? 'exact' : 'inexact'; |
543 | }, $fieldValues); |
544 | } elseif ( |
545 | preg_match( |
546 | '/weight\((?<fieldName>[^:]+):(?<fieldValue>\"[^"]+\"|\w+)(.+?(?= in))?/u', |
547 | $description, |
548 | $matches |
549 | ) |
550 | ) { |
551 | $fieldValue = str_replace('"', '', $matches['fieldValue']); |
552 | $res['fieldName'] = [$matches['fieldName']]; |
553 | $res['fieldValue'] = [$fieldValue]; |
554 | // extra space to only exact match whole words |
555 | $res['exactMatch'] = [str_contains($this->lookfor . ' ', $fieldValue . ' ') ? 'exact' : 'inexact']; |
556 | } |
557 | if ($fieldModifier !== null) { |
558 | $res['fieldModifier'] = $fieldModifier; |
559 | } |
560 | return $res; |
561 | } |
562 | } |