Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
91.91% covered (success)
91.91%
159 / 173
78.95% covered (warning)
78.95%
15 / 19
CRAP
0.00% covered (danger)
0.00%
0 / 1
Explanation
91.91% covered (success)
91.91%
159 / 173
78.95% covered (warning)
78.95%
15 / 19
60.84
0.00% covered (danger)
0.00%
0 / 1
 getMaxScore
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getTotalScore
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getBaseScore
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getBoost
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getCoord
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getExplanation
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getRest
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getExplanationForRest
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getMaxFields
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getMinPercentage
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getDecimalPlaces
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 performRequest
100.00% covered (success)
100.00%
24 / 24
100.00% covered (success)
100.00%
1 / 1
1
 build
97.06% covered (success)
97.06%
33 / 34
0.00% covered (danger)
0.00%
0 / 1
12
 cleanLines
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
1
 buildRecursive
97.92% covered (success)
97.92%
47 / 48
0.00% covered (danger)
0.00%
0 / 1
22
 getLevel
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getChildLines
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 parseLine
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 parseExplainElement
68.57% covered (warning)
68.57%
24 / 35
0.00% covered (danger)
0.00%
0 / 1
7.12
1<?php
2
3/**
4 * Solr Explanation
5 *
6 * PHP version 8
7 *
8 * Copyright (C) Hebis Verbundzentrale 2023.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2,
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 *
23 * @category VuFind
24 * @package  Search_Solr
25 * @author   Dennis Schrittenlocher <Dennis.Schrittenlocher@outlook.de>
26 * @author   Thomas Wagener <wagener@hebis.uni-frankfurt.de>
27 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
28 * @link     https://vufind.org Main Page
29 */
30
31namespace VuFind\Search\Solr;
32
33use VuFindSearch\Backend\Solr\Command\RawJsonSearchCommand;
34use VuFindSearch\ParamBag;
35
36use function count;
37use function floatval;
38use function strlen;
39
40/**
41 * Solr Explanation
42 *
43 * @category VuFind
44 * @package  Search_Solr
45 * @author   Dennis Schrittenlocher <Dennis.Schrittenlocher@outlook.de>
46 * @author   Thomas Wagener <wagener@hebis.uni-frankfurt.de>
47 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
48 * @link     https://vufind.org Main Page
49 */
50class Explanation extends \VuFind\Search\Base\Explanation
51{
52    /**
53     * Value of the relevance score of the best match.
54     *
55     * @var float
56     */
57    protected $maxScore;
58
59    /**
60     * Relevance score of the title with the recordId.
61     *
62     * @var float
63     */
64    protected $totalScore;
65
66    /**
67     * Relevance score of the title with the recordId without modifiers (boost / coord).
68     *
69     * @var float
70     */
71    protected $baseScore;
72
73    /**
74     * Value of boost.
75     *
76     * @var float
77     */
78    protected $boost;
79
80    /**
81     * Value of coord. If only 2 out of 4 search query parts match, then coord would be 1/2.
82     * It adjusts the score so that the 2 other search query parts also influence the score.
83     *
84     * @var float
85     */
86    protected $coord;
87
88    /**
89     * The main result of the explain class,
90     * an array with every match and its values.
91     *
92     * @var array
93     */
94    protected $explanation = [];
95
96    /**
97     * Describes the rest. It has restValue and the percentage from total value.
98     *
99     * @var ?array
100     */
101    protected $rest = null;
102
103    /**
104     * Contains the fields that were removed from the main explanation.
105     *
106     * @var array
107     */
108    protected $explanationForRest = [];
109
110    /**
111     * Get relevance value of best scoring title.
112     *
113     * @return float
114     */
115    public function getMaxScore()
116    {
117        return $this->maxScore;
118    }
119
120    /**
121     * Get relevance score of this title.
122     *
123     * @return float
124     */
125    public function getTotalScore()
126    {
127        return $this->totalScore;
128    }
129
130    /**
131     * Get relevance score of this title without modifier (boost/coord).
132     *
133     * @return float
134     */
135    public function getBaseScore()
136    {
137        return $this->baseScore;
138    }
139
140    /**
141     * Get value of the boost used in Solr query.
142     *
143     * @return float
144     */
145    public function getBoost()
146    {
147        return $this->boost;
148    }
149
150    /**
151     * Get value of coord.
152     *
153     * @return float
154     */
155    public function getCoord()
156    {
157        return $this->coord;
158    }
159
160    /**
161     * Get the explanation, parsed from Solr response.
162     *
163     * @return array
164     */
165    public function getExplanation()
166    {
167        return $this->explanation;
168    }
169
170    /**
171     * Get rest. It has restValue and the percentage from total value.
172     *
173     * @return array
174     */
175    public function getRest()
176    {
177        return $this->rest;
178    }
179
180    /**
181     * Get the explanation for the rest.
182     *
183     * @return array
184     */
185    public function getExplanationForRest()
186    {
187        return $this->explanationForRest;
188    }
189
190    /**
191     * Get the maximal number of fields to be included.
192     *
193     * @return int
194     */
195    public function getMaxFields()
196    {
197        return $this->config['Explain']['maxFields'] ?? -1;
198    }
199
200    /**
201     * Get the minimal percentage for fields to be included.
202     *
203     * @return float
204     */
205    public function getMinPercentage()
206    {
207        return $this->config['Explain']['minPercent'] ?? 0;
208    }
209
210    /**
211     * Get number of decimal places for to be shown in the explanation.
212     *
213     * @return int
214     */
215    public function getDecimalPlaces()
216    {
217        return $this->config['Explain']['decimalPlaces'] ?? 2;
218    }
219
220    /**
221     * Performing request and creating explanation.
222     *
223     * @param string $recordId Record Id
224     *
225     * @throws \VuFindSearch\Backend\Exception\BackendException
226     * @return void
227     */
228    public function performRequest($recordId)
229    {
230        // get search query
231        $query  = $this->getParams()->getQuery();
232
233        // prepare search params
234        $params = $this->getParams()->getBackendParameters();
235        $params->set('spellcheck', 'false');
236        $explainParams = new ParamBag([
237            'fl' => 'id,score',
238            'facet' => 'true',
239            'debug' => 'true',
240            'indent' => 'true',
241            'param' => 'q',
242            'echoParams' => 'all',
243            'explainOther' => 'id:"' . addcslashes($recordId, '"') . '"',
244        ]);
245        $params->mergeWith($explainParams);
246
247        // perform request
248        $explainCommand = new RawJsonSearchCommand(
249            'Solr',
250            $query,
251            0,
252            0,
253            $params,
254            true
255        );
256        $explainCommand = $this->searchService->invoke($explainCommand);
257        $response = $explainCommand->getResult();
258
259        // build explanation
260        $this->build($response, $recordId);
261    }
262
263    /**
264     * Builds explanation and sets up debug message to see raw Solr response.
265     *
266     * @param array  $response Solr response
267     * @param string $recordId recordId of title for Solr explainOther
268     *
269     * @throws \VuFindSearch\Backend\Exception\BackendException
270     * @return void
271     */
272    protected function build($response, $recordId)
273    {
274        // prepare parsing
275        $recordId = str_replace(['\(', '\)'], ['(', ')'], $recordId);
276
277        if (empty($lines = $response['debug']['explainOther'][$recordId])) {
278            throw new \VuFindSearch\Backend\Exception\BackendException(
279                "No explainOther was returned for record {$recordId}"
280            );
281        }
282
283        $this->debug($lines);
284        $lines = $this->cleanLines($lines);
285
286        // get basic values
287        $this->lookfor = strtolower($response['debug']['rawquerystring']);
288        $this->recordId = $recordId;
289        $this->maxScore = $response['response']['maxScore'];
290        $this->totalScore = $this->parseLine($lines[0])['value'];
291        $this->baseScore = $this->totalScore;
292
293        // handle boost
294        if (($response['responseHeader']['params']['boost'] ?? false) && count($lines) > 1) {
295            $this->boost = $this->parseLine(array_pop($lines));
296            if ($this->boost['value'] > 0) {
297                $this->baseScore = $this->baseScore / $this->boost['value'];
298            }
299        }
300
301        // handle coord
302        if (!empty($lines) && str_contains($this->parseLine(end($lines))['description'], 'coord')) {
303            $this->coord = $this->parseLine(end($lines));
304            if ($this->coord['value'] > 0) {
305                $this->baseScore = $this->baseScore / $this->coord['value'];
306            }
307        }
308
309        // build explanation
310        $this->buildRecursive(array_reverse($lines), 1);
311
312        // sort explanation descending by value
313        usort($this->explanation, function ($a, $b) {
314            return $b['value'] <=> $a['value'];
315        });
316
317        // remove fields that exceed the fields limit and add them to rest
318        $maxFields = $this->getMaxFields();
319        if ($maxFields >= 0 && count($this->explanation) > $maxFields) {
320            $explanationForRest = array_splice($this->explanation, $maxFields, count($this->explanation) - $maxFields);
321            $this->explanationForRest = array_merge($this->explanationForRest, $explanationForRest);
322        }
323
324        // handle rest
325        if (count($this->explanationForRest) > 0) {
326            usort($this->explanationForRest, function ($a, $b) {
327                return $b['value'] <=> $a['value'];
328            });
329
330            $restValue = array_sum(array_column($this->explanationForRest, 'value'));
331            if ($this->baseScore > 0) {
332                $this->rest = ['value' => $restValue, 'percent' => 100 * $restValue / $this->baseScore];
333            } else {
334                $this->rest = ['value' => $restValue, 'percent' => 0];
335            }
336        }
337    }
338
339    /**
340     * Norms the response by replacing expressions to support
341     * all versions of Solr. Removes empty lines.
342     *
343     * @param string $lines raw lines
344     *
345     * @return array normed lines
346     */
347    protected function cleanLines($lines)
348    {
349        $lines = preg_replace('/\\n\), product/', '), product', $lines);
350        $lines = preg_replace('/ \(MATCH\)/', '', $lines);
351        $lines = preg_replace('/ max of/', 'max plus 0 times others of', $lines);
352        $lines = preg_replace('/ConstantScore/', 'const weight', $lines);
353        $lines = preg_replace('/No match/', 'Failure to meet condition(s)', $lines);
354        $lines = explode("\n", $lines);
355        $lines = array_values(array_filter($lines, function ($value) {
356            return !empty($value);
357        }));
358        return $lines;
359    }
360
361    /**
362     * Goes through each line of explainOther response
363     * adding a matched field to $explanation.
364     *
365     * @param array $lines    Solr lines
366     * @param float $modifier 1 (* tieValue)
367     *
368     * @throws \VuFindSearch\Backend\Exception\BackendException
369     * @return string Solr lines without the last inspected line
370     */
371    protected function buildRecursive($lines, $modifier)
372    {
373        $line = array_pop($lines);
374        $curLevel = $this->getLevel($line);
375
376        $info = $this->parseLine($line);
377        $value = $info['value'];
378        $description = $info['description'];
379
380        if (str_contains($description, 'Failure to meet condition(s)')) {
381            throw new \VuFindSearch\Backend\Exception\BackendException(
382                "Record {$this->getRecordId()} fails to match arguments."
383            );
384        }
385
386        $isMaxPlusOthers = preg_match(
387            '/max plus (?<tieValue>[0-9.]*(E-\d+)?) times others of:/',
388            $description,
389            $matches
390        );
391
392        // get max child
393        if ($isMaxPlusOthers) {
394            $maxValue = 0;
395            $maxChild = null;
396            foreach ($this->getChildLines($lines, $curLevel) as $child) {
397                if ($this->parseLine($child)['value'] > $maxValue) {
398                    $maxValue = $this->parseLine($child)['value'];
399                    $maxChild = $child;
400                }
401            }
402        }
403
404        // summary of lower children
405        if (
406            (
407                (str_contains($description, 'product of:') || str_contains($description, 'sum of') || $isMaxPlusOthers)
408                && !str_contains($description, 'weight')
409            )
410            || str_contains($description, 'weight(FunctionScoreQuery')
411        ) {
412            // build children
413            while (!empty($lines) && $this->getLevel(end($lines)) > $curLevel) {
414                if (!$isMaxPlusOthers || end($lines) == $maxChild) {
415                    $lines = $this->buildRecursive($lines, $modifier);
416                } else {
417                    $lines = $this->buildRecursive($lines, $modifier * $matches['tieValue']);
418                }
419            }
420            // match in field
421        } elseif (str_contains($description, 'weight') && !str_contains($description, 'FunctionScoreQuery')) {
422            // parse explaining element
423            $currentValue = $value * $modifier;
424            if ($this->baseScore > 0) {
425                $percentage = 100 * $currentValue / $this->baseScore;
426            } else {
427                $percentage = 0;
428            }
429
430            // get fieldModifier and remove unused higher level lines
431            $fieldModifier = null;
432            if (str_contains($description, 'const weight')) {
433                $fieldModifier = 0;
434            }
435            while (!empty($lines) && $curLevel < $this->getLevel(end($lines))) {
436                $childLine = array_pop($lines);
437                $childInfo = $this->parseLine($childLine);
438                $childValue = $childInfo['value'];
439                $childDescription = $childInfo['description'];
440                if ($childDescription === ' boost') {
441                    $fieldModifier = $childValue;
442                }
443            }
444
445            // add to rest if lower than min percentage
446            $explainElement = $this->parseExplainElement($currentValue, $description, $percentage, $fieldModifier);
447            if ($percentage < $this->getMinPercentage()) {
448                $this->explanationForRest[] = $explainElement;
449            } else {
450                $this->explanation[] = $explainElement;
451            }
452        }
453        return $lines;
454    }
455
456    /**
457     * Returns indent of a line.
458     *
459     * @param string $line Line
460     *
461     * @return int
462     */
463    protected function getLevel($line)
464    {
465        return (strlen($line) - strlen(ltrim($line))) / 2;
466    }
467
468    /**
469     * Gets all lines with one level higher than the parent line.
470     *
471     * @param array $lines Lines
472     * @param int   $level Level
473     *
474     * @return array
475     */
476    protected function getChildLines($lines, $level)
477    {
478        $res = [];
479        while (!empty($lines) && $this->getLevel(end($lines)) > $level) {
480            $line = array_pop($lines);
481            if ($this->getLevel($line) == $level + 1) {
482                $res[] = $line;
483            }
484        }
485        return $res;
486    }
487
488    /**
489     * Extracts value and description of a line.
490     *
491     * @param string $line Line
492     *
493     * @return array
494     */
495    protected function parseLine($line)
496    {
497        $info = explode('=', $line, 2);
498        return [
499            'value' => floatval($info[0]),
500            'description' => $info[1],
501        ];
502    }
503
504    /**
505     * Unites all infos of a match to an explainElement.
506     *
507     * @param float  $value         Value
508     * @param string $description   Description
509     * @param float  $percentage    Percentage
510     * @param float  $fieldModifier Field Modifier
511     *
512     * @return array
513     */
514    protected function parseExplainElement($value, $description, $percentage, $fieldModifier)
515    {
516        $res = [
517            'value' => $value,
518            'percent' => $percentage,
519            'fieldName' => ['unknown'],
520            'fieldValue' => ['unknown'],
521            'exactMatch' => ['unknown'],
522        ];
523        if (
524            preg_match(
525                '/weight\(Synonym\((?<synonyms>([^:]+:(\"([^\"]+\s?)+[^\"]+\"|\w+)\s?)+)\)(.+?(?= in))?/u',
526                $description,
527                $matches
528            )
529        ) {
530            preg_match_all(
531                '/(?<fieldName>[^:\s]+):(?<fieldValue>\"[^"]+\"|\w+)/u',
532                $matches['synonyms'],
533                $synonymMatches
534            );
535            $fieldValues = array_map(function ($fieldValue) {
536                return str_replace('"', '', $fieldValue);
537            }, $synonymMatches['fieldValue']);
538            $res['fieldName'] = $synonymMatches['fieldName'];
539            $res['fieldValue'] = $fieldValues;
540            // extra space to only exact match whole words
541            $res['exactMatch'] = array_map(function ($fieldValue) {
542                return str_contains($this->lookfor . ' ', $fieldValue . ' ') ? 'exact' : 'inexact';
543            }, $fieldValues);
544        } elseif (
545            preg_match(
546                '/weight\((?<fieldName>[^:]+):(?<fieldValue>\"[^"]+\"|\w+)(.+?(?= in))?/u',
547                $description,
548                $matches
549            )
550        ) {
551            $fieldValue = str_replace('"', '', $matches['fieldValue']);
552            $res['fieldName'] = [$matches['fieldName']];
553            $res['fieldValue'] = [$fieldValue];
554            // extra space to only exact match whole words
555            $res['exactMatch'] = [str_contains($this->lookfor . ' ', $fieldValue . ' ') ? 'exact' : 'inexact'];
556        }
557        if ($fieldModifier !== null) {
558            $res['fieldModifier'] = $fieldModifier;
559        }
560        return $res;
561    }
562}