Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.20% |
177 / 184 |
|
72.22% |
13 / 18 |
CRAP | |
0.00% |
0 / 1 |
QueryBuilder | |
96.20% |
177 / 184 |
|
72.22% |
13 / 18 |
89 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
build | |
97.56% |
40 / 41 |
|
0.00% |
0 / 1 |
20 | |||
checkParamConditions | |
94.59% |
35 / 37 |
|
0.00% |
0 / 1 |
18.05 | |||
hasDismaxParamsField | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
getSearchTypes | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getFieldsToHighlight | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
setFieldsToHighlight | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setCreateSpellingQuery | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSpecs | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
getLuceneHelper | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
setLuceneHelper | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSearchHandler | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
8 | |||
reduceQueryGroup | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
reduceQueryGroupComponents | |
95.83% |
23 / 24 |
|
0.00% |
0 / 1 |
7 | |||
createSearchString | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
5.39 | |||
fixTrailingQuestionMarks | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
getNormalizedQueryString | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
createAdvancedInnerSearchString | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | |
3 | /** |
4 | * SOLR QueryBuilder. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
26 | * @author David Maus <maus@hab.de> |
27 | * @author Demian Katz <demian.katz@villanova.edu> |
28 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
29 | * @link https://vufind.org |
30 | */ |
31 | |
32 | namespace VuFindSearch\Backend\Solr; |
33 | |
34 | use VuFindSearch\ParamBag; |
35 | use VuFindSearch\Query\AbstractQuery; |
36 | use VuFindSearch\Query\Query; |
37 | use VuFindSearch\Query\QueryGroup; |
38 | |
39 | use function in_array; |
40 | use function is_array; |
41 | use function strlen; |
42 | |
43 | /** |
44 | * SOLR QueryBuilder. |
45 | * |
46 | * @category VuFind |
47 | * @package Search |
48 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
49 | * @author David Maus <maus@hab.de> |
50 | * @author Demian Katz <demian.katz@villanova.edu> |
51 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
52 | * @link https://vufind.org |
53 | */ |
54 | class QueryBuilder implements QueryBuilderInterface |
55 | { |
56 | /** |
57 | * Default dismax handler (if no DismaxHandler set in specs). |
58 | * |
59 | * @var string |
60 | */ |
61 | protected $defaultDismaxHandler; |
62 | |
63 | /** |
64 | * Search specs. |
65 | * |
66 | * @var array |
67 | */ |
68 | protected $specs = []; |
69 | |
70 | /** |
71 | * Search specs for exact searches. |
72 | * |
73 | * @var array |
74 | */ |
75 | protected $exactSpecs = []; |
76 | |
77 | /** |
78 | * Global extra Solr query parameters |
79 | * |
80 | * @var array |
81 | */ |
82 | protected $globalExtraParams = []; |
83 | |
84 | /** |
85 | * Solr fields to highlight. Also serves as a flag for whether to perform |
86 | * highlight-specific behavior; if the field list is empty, highlighting is |
87 | * skipped. |
88 | * |
89 | * @var string |
90 | */ |
91 | protected $fieldsToHighlight = ''; |
92 | |
93 | /** |
94 | * Should we create the spellcheck.q parameter when appropriate? |
95 | * |
96 | * @var bool |
97 | */ |
98 | protected $createSpellingQuery = false; |
99 | |
100 | /** |
101 | * Lucene syntax helper |
102 | * |
103 | * @var LuceneSyntaxHelper |
104 | */ |
105 | protected $luceneHelper = null; |
106 | |
107 | /** |
108 | * Constructor. |
109 | * |
110 | * @param array $specs Search handler specifications |
111 | * @param string $defaultDismaxHandler Default dismax handler (if no |
112 | * DismaxHandler set in specs). |
113 | * |
114 | * @return void |
115 | */ |
116 | public function __construct( |
117 | array $specs = [], |
118 | $defaultDismaxHandler = 'dismax' |
119 | ) { |
120 | $this->defaultDismaxHandler = $defaultDismaxHandler; |
121 | $this->setSpecs($specs); |
122 | } |
123 | |
124 | /// Public API |
125 | |
126 | /** |
127 | * Return SOLR search parameters based on a user query and params. |
128 | * |
129 | * @param AbstractQuery $query User query |
130 | * @param ?ParamBag $params Search backend parameters |
131 | * |
132 | * @return ParamBag |
133 | */ |
134 | public function build(AbstractQuery $query, ?ParamBag $params = null) |
135 | { |
136 | $newParams = new ParamBag(); |
137 | |
138 | // Add spelling query if applicable -- note that we must set this up before |
139 | // we process the main query in order to avoid unwanted extra syntax: |
140 | if ($this->createSpellingQuery) { |
141 | $newParams->set( |
142 | 'spellcheck.q', |
143 | $this->getLuceneHelper()->extractSearchTerms($query->getAllTerms()) |
144 | ); |
145 | } |
146 | |
147 | if ($query instanceof QueryGroup) { |
148 | $finalQuery = $this->reduceQueryGroup($query); |
149 | } else { |
150 | // Clone the query to avoid modifying the original user-visible query |
151 | $finalQuery = clone $query; |
152 | $finalQuery->setString($this->getNormalizedQueryString($query)); |
153 | } |
154 | $string = $finalQuery->getString() ?: '*:*'; |
155 | |
156 | // Highlighting is enabled if we have a field list set. |
157 | $highlight = !empty($this->fieldsToHighlight); |
158 | |
159 | if ($handler = $this->getSearchHandler($finalQuery->getHandler(), $string)) { |
160 | if ( |
161 | !$handler->hasExtendedDismax() |
162 | && $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string) |
163 | ) { |
164 | $string = $this->createAdvancedInnerSearchString($string, $handler); |
165 | if ($handler->hasDismax()) { |
166 | $oldString = $string; |
167 | $string = $handler->createBoostQueryString($string); |
168 | |
169 | // If a boost was added, we don't want to highlight based on |
170 | // the boost query, so we should use the non-boosted version: |
171 | if ($highlight && $oldString != $string) { |
172 | $newParams->set('hl.q', $oldString); |
173 | } |
174 | } |
175 | } elseif ($handler->hasDismax()) { |
176 | $newParams->set('qf', implode(' ', $handler->getDismaxFields())); |
177 | $newParams->set('qt', $handler->getDismaxHandler()); |
178 | foreach ($handler->getDismaxParams() as $param) { |
179 | $newParams->add(reset($param), next($param)); |
180 | } |
181 | if ($handler->hasFilterQuery()) { |
182 | $newParams->add('fq', $handler->getFilterQuery()); |
183 | } |
184 | } else { |
185 | $string = $handler->createSimpleQueryString($string); |
186 | } |
187 | } |
188 | // Set an appropriate highlight field list when applicable: |
189 | if ($highlight) { |
190 | $filter = $handler ? $handler->getAllFields() : []; |
191 | $newParams->add('hl.fl', $this->getFieldsToHighlight($filter)); |
192 | } |
193 | $newParams->set('q', $string); |
194 | |
195 | // Handle any extra parameters: |
196 | foreach ($this->globalExtraParams as $extraParam) { |
197 | if (empty($extraParam['param']) || empty($extraParam['value'])) { |
198 | continue; |
199 | } |
200 | if ( |
201 | !$this->checkParamConditions($query, $params, $extraParam['conditions'] ?? []) |
202 | ) { |
203 | continue; |
204 | } |
205 | foreach ((array)$extraParam['value'] as $value) { |
206 | $newParams->add($extraParam['param'], $value); |
207 | } |
208 | } |
209 | |
210 | return $newParams; |
211 | } |
212 | |
213 | /** |
214 | * Check if the conditions match for an extra parameter |
215 | * |
216 | * @param AbstractQuery $query Search query |
217 | * @param ?ParamBag $params Search backend parameters |
218 | * @param array $conditions Required conditions |
219 | * |
220 | * @return bool |
221 | */ |
222 | protected function checkParamConditions( |
223 | AbstractQuery $query, |
224 | ?ParamBag $params, |
225 | array $conditions |
226 | ): bool { |
227 | if (empty($conditions)) { |
228 | return true; |
229 | } |
230 | $searchTypes = $this->getSearchTypes($query); |
231 | foreach ($conditions as $condition) { |
232 | if (!is_array($condition)) { |
233 | continue; |
234 | } |
235 | $values = reset($condition); |
236 | $condition = key($condition); |
237 | switch ($condition) { |
238 | case 'SearchTypeIn': |
239 | if (empty(array_intersect((array)$values, $searchTypes))) { |
240 | return false; |
241 | } |
242 | break; |
243 | case 'AllSearchTypesIn': |
244 | if (array_diff($searchTypes, (array)$values)) { |
245 | return false; |
246 | } |
247 | break; |
248 | case 'SearchTypeNotIn': |
249 | if (!empty(array_intersect((array)$values, $searchTypes))) { |
250 | return false; |
251 | } |
252 | break; |
253 | case 'NoDismaxParams': |
254 | foreach ((array)$values as $value) { |
255 | if ($this->hasDismaxParamsField($searchTypes, $value)) { |
256 | return false; |
257 | } |
258 | } |
259 | break; |
260 | case 'SortIn': |
261 | $sort = $params?->get('sort'); |
262 | if (empty(array_intersect((array)$values, (array)$sort))) { |
263 | return false; |
264 | } |
265 | break; |
266 | case 'SortNotIn': |
267 | $sort = $params?->get('sort'); |
268 | if (!empty(array_intersect((array)$values, (array)$sort))) { |
269 | return false; |
270 | } |
271 | break; |
272 | default: |
273 | throw new \Exception("Unknown parameter condition: $condition"); |
274 | } |
275 | } |
276 | return true; |
277 | } |
278 | |
279 | /** |
280 | * Check if any of the given search types has the field in DismaxParams |
281 | * |
282 | * @param array $searchTypes Search types to check |
283 | * @param string $field Field to check for |
284 | * |
285 | * @return bool |
286 | */ |
287 | protected function hasDismaxParamsField(array $searchTypes, string $field): bool |
288 | { |
289 | foreach ($searchTypes as $searchType) { |
290 | if ($handler = $this->getSearchHandler($searchType, '')) { |
291 | foreach ($handler->getDismaxParams() as $param) { |
292 | if (reset($param) === $field) { |
293 | return true; |
294 | } |
295 | } |
296 | } |
297 | } |
298 | return false; |
299 | } |
300 | |
301 | /** |
302 | * Get an array of search types used in the given search |
303 | * |
304 | * @param AbstractQuery $query Query |
305 | * |
306 | * @return array |
307 | */ |
308 | protected function getSearchTypes(AbstractQuery $query): array |
309 | { |
310 | if ($query instanceof QueryGroup) { |
311 | $callback = function ($carry, $item) { |
312 | return array_merge($carry, $this->getSearchTypes($item)); |
313 | }; |
314 | return array_unique(array_reduce($query->getQueries(), $callback, [])); |
315 | } |
316 | return [$query->getHandler()]; |
317 | } |
318 | |
319 | /** |
320 | * Get list of fields to highlight, filtered by array. |
321 | * |
322 | * @param array $filter Field list to use as a filter. |
323 | * |
324 | * @return string |
325 | */ |
326 | protected function getFieldsToHighlight(array $filter = []) |
327 | { |
328 | // No filter? Return unmodified default: |
329 | if (empty($filter)) { |
330 | return $this->fieldsToHighlight; |
331 | } |
332 | // Account for possibility of comma OR space delimiters: |
333 | $fields = array_map('trim', preg_split('/[, ]/', $this->fieldsToHighlight)); |
334 | // Wildcard in field list? Return filter as-is; otherwise, use intersection. |
335 | $list = in_array('*', $fields) ? $filter : array_intersect($fields, $filter); |
336 | return implode(',', $list); |
337 | } |
338 | |
339 | /** |
340 | * Set list of fields to highlight, if any (or '*' for all). Set to an |
341 | * empty string (the default) to completely disable highlighting-related |
342 | * functionality. |
343 | * |
344 | * @param string $list Highlighting field list |
345 | * |
346 | * @return QueryBuilder |
347 | */ |
348 | public function setFieldsToHighlight($list) |
349 | { |
350 | $this->fieldsToHighlight = $list; |
351 | return $this; |
352 | } |
353 | |
354 | /** |
355 | * Control whether or not the QueryBuilder should create a spellcheck.q |
356 | * parameter. (Turned off by default). |
357 | * |
358 | * @param bool $enable Should spelling query generation be enabled? |
359 | * |
360 | * @return void |
361 | */ |
362 | public function setCreateSpellingQuery($enable) |
363 | { |
364 | $this->createSpellingQuery = $enable; |
365 | } |
366 | |
367 | /** |
368 | * Set query builder search specs. |
369 | * |
370 | * @param array $specs Search specs |
371 | * |
372 | * @return void |
373 | */ |
374 | public function setSpecs(array $specs) |
375 | { |
376 | foreach ($specs as $handler => $spec) { |
377 | if ('GlobalExtraParams' === $handler) { |
378 | $this->globalExtraParams = $spec; |
379 | continue; |
380 | } |
381 | if (isset($spec['ExactSettings'])) { |
382 | $this->exactSpecs[strtolower($handler)] = new SearchHandler( |
383 | $spec['ExactSettings'], |
384 | $this->defaultDismaxHandler |
385 | ); |
386 | unset($spec['ExactSettings']); |
387 | } |
388 | $this->specs[strtolower($handler)] |
389 | = new SearchHandler($spec, $this->defaultDismaxHandler); |
390 | } |
391 | } |
392 | |
393 | /** |
394 | * Get Lucene syntax helper |
395 | * |
396 | * @return LuceneSyntaxHelper |
397 | */ |
398 | public function getLuceneHelper() |
399 | { |
400 | if (null === $this->luceneHelper) { |
401 | $this->luceneHelper = new LuceneSyntaxHelper(); |
402 | } |
403 | return $this->luceneHelper; |
404 | } |
405 | |
406 | /** |
407 | * Set Lucene syntax helper |
408 | * |
409 | * @param LuceneSyntaxHelper $helper Lucene syntax helper |
410 | * |
411 | * @return void |
412 | */ |
413 | public function setLuceneHelper(LuceneSyntaxHelper $helper) |
414 | { |
415 | $this->luceneHelper = $helper; |
416 | } |
417 | |
418 | /// Internal API |
419 | |
420 | /** |
421 | * Return named search handler. |
422 | * |
423 | * @param string $handler Search handler name |
424 | * @param string $searchString Search query |
425 | * |
426 | * @return SearchHandler|null |
427 | */ |
428 | protected function getSearchHandler($handler, string $searchString) |
429 | { |
430 | $handler = $handler ? strtolower($handler) : $handler; |
431 | if ($handler) { |
432 | // Since we will rarely have exactSpecs set, it is less expensive |
433 | // to check for a handler first before doing multiple string |
434 | // operations to determine eligibility for exact handling. |
435 | if (isset($this->exactSpecs[$handler])) { |
436 | $searchString = trim($searchString); |
437 | if ( |
438 | strlen($searchString) > 1 |
439 | && str_starts_with($searchString, '"') |
440 | && str_ends_with($searchString, '"') |
441 | ) { |
442 | return $this->exactSpecs[$handler]; |
443 | } |
444 | } |
445 | if (isset($this->specs[$handler])) { |
446 | return $this->specs[$handler]; |
447 | } |
448 | } |
449 | return null; |
450 | } |
451 | |
452 | /** |
453 | * Reduce query group a single query. |
454 | * |
455 | * @param QueryGroup $group Query group to reduce |
456 | * |
457 | * @return Query |
458 | */ |
459 | protected function reduceQueryGroup(QueryGroup $group) |
460 | { |
461 | $searchString = $this->reduceQueryGroupComponents($group); |
462 | $searchHandler = $group->getReducedHandler(); |
463 | return new Query($searchString, $searchHandler); |
464 | } |
465 | |
466 | /** |
467 | * Reduce components of query group to a search string of a simple query. |
468 | * |
469 | * This function implements the recursive reduction of a query group. |
470 | * |
471 | * @param AbstractQuery $component Component |
472 | * |
473 | * @return string |
474 | * |
475 | * @see \VuFindSearch\Backend\Solr\QueryBuilder::reduceQueryGroup() |
476 | */ |
477 | protected function reduceQueryGroupComponents(AbstractQuery $component) |
478 | { |
479 | if ($component instanceof QueryGroup) { |
480 | $reduced = array_map( |
481 | [$this, 'reduceQueryGroupComponents'], |
482 | $component->getQueries() |
483 | ); |
484 | $reduced = array_filter( |
485 | $reduced, |
486 | function ($s) { |
487 | return '' !== $s; |
488 | } |
489 | ); |
490 | $searchString = $reduced |
491 | ? ('(' . implode(" {$component->getOperator()} ", $reduced) . ')') |
492 | : ''; |
493 | if ($component->isNegated() && !empty($searchString)) { |
494 | $searchString = '(*:* NOT ' . $searchString . ')'; |
495 | } |
496 | } else { |
497 | $searchString = $this->getNormalizedQueryString($component); |
498 | $searchHandler = $this->getSearchHandler( |
499 | $component->getHandler(), |
500 | $searchString |
501 | ); |
502 | if ($searchHandler && '' !== $searchString) { |
503 | $searchString |
504 | = $this->createSearchString($searchString, $searchHandler); |
505 | } |
506 | } |
507 | return $searchString; |
508 | } |
509 | |
510 | /** |
511 | * Return search string based on input and handler. |
512 | * |
513 | * @param string $string Input search string |
514 | * @param SearchHandler $handler Search handler |
515 | * |
516 | * @return string |
517 | */ |
518 | protected function createSearchString($string, SearchHandler $handler = null) |
519 | { |
520 | $advanced = $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string); |
521 | |
522 | if (null === $string) { |
523 | return ''; |
524 | } |
525 | if ($advanced && $handler) { |
526 | return $handler->createAdvancedQueryString($string); |
527 | } elseif ($handler) { |
528 | return $handler->createSimpleQueryString($string); |
529 | } else { |
530 | return $string; |
531 | } |
532 | } |
533 | |
534 | /** |
535 | * If the query ends in a non-escaped question mark, the user may not really |
536 | * intend to use the question mark as a wildcard -- let's account for that |
537 | * possibility. |
538 | * |
539 | * @param string $string Search query to adjust |
540 | * |
541 | * @return string |
542 | */ |
543 | protected function fixTrailingQuestionMarks($string) |
544 | { |
545 | // Treat colon and whitespace as word separators -- in either case, we |
546 | // should add parentheses for accuracy. |
547 | $multiword = preg_match('/[^\s][\s:]+[^\s]/', $string); |
548 | $callback = function ($matches) use ($multiword) { |
549 | // Make sure all question marks are properly escaped (first unescape |
550 | // any that are already escaped to prevent double-escapes, then escape |
551 | // all of them): |
552 | $s = $matches[1]; |
553 | $escaped = str_replace('?', '\?', str_replace('\?', '?', $s)); |
554 | $s = "($s) OR ($escaped)"; |
555 | if ($multiword) { |
556 | $s = "($s) "; |
557 | } |
558 | return $s; |
559 | }; |
560 | // Use a lookahead to skip matches found within quoted phrases. |
561 | $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)'; |
562 | $string = preg_replace_callback( |
563 | '/([^\s:()]+\?)(\s|$)' . $lookahead . '/', |
564 | $callback, |
565 | $string |
566 | ); |
567 | return rtrim($string); |
568 | } |
569 | |
570 | /** |
571 | * Given a Query object, return a fully normalized version of the query string. |
572 | * |
573 | * @param Query $query Query object |
574 | * |
575 | * @return string |
576 | */ |
577 | protected function getNormalizedQueryString($query) |
578 | { |
579 | $queryString = $query->getString(); |
580 | if ($handler = $this->getSearchHandler($query->getHandler(), $queryString)) { |
581 | $queryString = $handler->preprocessQueryString($queryString); |
582 | } |
583 | return $this->fixTrailingQuestionMarks( |
584 | $this->getLuceneHelper()->normalizeSearchString( |
585 | $queryString |
586 | ) |
587 | ); |
588 | } |
589 | |
590 | /** |
591 | * Return advanced inner search string based on input and handler. |
592 | * |
593 | * @param string $string Input search string |
594 | * @param SearchHandler $handler Search handler |
595 | * |
596 | * @return string |
597 | */ |
598 | protected function createAdvancedInnerSearchString( |
599 | $string, |
600 | SearchHandler $handler |
601 | ) { |
602 | // Special case -- if the user wants all records but the current handler |
603 | // has a filter query, apply the filter query: |
604 | if (trim($string) === '*:*' && $handler && $handler->hasFilterQuery()) { |
605 | return $handler->getFilterQuery(); |
606 | } |
607 | |
608 | // If the query already includes field specifications, we can't easily |
609 | // apply it to other fields through our defined handlers, so we'll leave |
610 | // it as-is: |
611 | if (strstr($string, ':')) { |
612 | return $string; |
613 | } |
614 | |
615 | return $handler |
616 | ? $handler->createAdvancedQueryString($string) : $string; |
617 | } |
618 | } |