Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.15% |
175 / 182 |
|
72.22% |
13 / 18 |
CRAP | |
0.00% |
0 / 1 |
QueryBuilder | |
96.15% |
175 / 182 |
|
72.22% |
13 / 18 |
88 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
build | |
97.62% |
41 / 42 |
|
0.00% |
0 / 1 |
20 | |||
checkParamConditions | |
94.59% |
35 / 37 |
|
0.00% |
0 / 1 |
18.05 | |||
hasDismaxParamsField | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
getSearchTypes | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getFieldsToHighlight | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
setFieldsToHighlight | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setCreateSpellingQuery | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSpecs | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
getLuceneHelper | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
setLuceneHelper | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSearchHandler | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
8 | |||
reduceQueryGroup | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
reduceQueryGroupComponents | |
95.83% |
23 / 24 |
|
0.00% |
0 / 1 |
7 | |||
createSearchString | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
5.39 | |||
fixTrailingQuestionMarks | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
getNormalizedQueryString | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
createAdvancedInnerSearchString | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | |
3 | /** |
4 | * SOLR QueryBuilder. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
26 | * @author David Maus <maus@hab.de> |
27 | * @author Demian Katz <demian.katz@villanova.edu> |
28 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
29 | * @link https://vufind.org |
30 | */ |
31 | |
32 | namespace VuFindSearch\Backend\Solr; |
33 | |
34 | use VuFindSearch\ParamBag; |
35 | use VuFindSearch\Query\AbstractQuery; |
36 | use VuFindSearch\Query\Query; |
37 | use VuFindSearch\Query\QueryGroup; |
38 | |
39 | use function in_array; |
40 | use function is_array; |
41 | use function strlen; |
42 | |
43 | /** |
44 | * SOLR QueryBuilder. |
45 | * |
46 | * @category VuFind |
47 | * @package Search |
48 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
49 | * @author David Maus <maus@hab.de> |
50 | * @author Demian Katz <demian.katz@villanova.edu> |
51 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
52 | * @link https://vufind.org |
53 | */ |
54 | class QueryBuilder implements QueryBuilderInterface |
55 | { |
56 | /** |
57 | * Default dismax handler (if no DismaxHandler set in specs). |
58 | * |
59 | * @var string |
60 | */ |
61 | protected $defaultDismaxHandler; |
62 | |
63 | /** |
64 | * Search specs. |
65 | * |
66 | * @var array |
67 | */ |
68 | protected $specs = []; |
69 | |
70 | /** |
71 | * Search specs for exact searches. |
72 | * |
73 | * @var array |
74 | */ |
75 | protected $exactSpecs = []; |
76 | |
77 | /** |
78 | * Global extra Solr query parameters |
79 | * |
80 | * @var array |
81 | */ |
82 | protected $globalExtraParams = []; |
83 | |
84 | /** |
85 | * Solr fields to highlight. Also serves as a flag for whether to perform |
86 | * highlight-specific behavior; if the field list is empty, highlighting is |
87 | * skipped. |
88 | * |
89 | * @var string |
90 | */ |
91 | protected $fieldsToHighlight = ''; |
92 | |
93 | /** |
94 | * Should we create the spellcheck.q parameter when appropriate? |
95 | * |
96 | * @var bool |
97 | */ |
98 | protected $createSpellingQuery = false; |
99 | |
100 | /** |
101 | * Lucene syntax helper |
102 | * |
103 | * @var LuceneSyntaxHelper |
104 | */ |
105 | protected $luceneHelper = null; |
106 | |
107 | /** |
108 | * Constructor. |
109 | * |
110 | * @param array $specs Search handler specifications |
111 | * @param string $defaultDismaxHandler Default dismax handler (if no |
112 | * DismaxHandler set in specs). |
113 | * |
114 | * @return void |
115 | */ |
116 | public function __construct( |
117 | array $specs = [], |
118 | $defaultDismaxHandler = 'dismax' |
119 | ) { |
120 | $this->defaultDismaxHandler = $defaultDismaxHandler; |
121 | $this->setSpecs($specs); |
122 | } |
123 | |
124 | /// Public API |
125 | |
126 | /** |
127 | * Return SOLR search parameters based on a user query and params. |
128 | * |
129 | * @param AbstractQuery $query User query |
130 | * @param ?ParamBag $params Search backend parameters |
131 | * |
132 | * @return ParamBag |
133 | */ |
134 | public function build(AbstractQuery $query, ?ParamBag $params = null) |
135 | { |
136 | $newParams = new ParamBag(); |
137 | |
138 | // Add spelling query if applicable -- note that we must set this up before |
139 | // we process the main query in order to avoid unwanted extra syntax: |
140 | if ($this->createSpellingQuery) { |
141 | $newParams->set( |
142 | 'spellcheck.q', |
143 | $this->getLuceneHelper()->extractSearchTerms($query->getAllTerms()) |
144 | ); |
145 | } |
146 | |
147 | if ($query instanceof QueryGroup) { |
148 | $finalQuery = $this->reduceQueryGroup($query); |
149 | } else { |
150 | // Clone the query to avoid modifying the original user-visible query |
151 | $finalQuery = clone $query; |
152 | $finalQuery->setString($this->getNormalizedQueryString($query)); |
153 | } |
154 | $string = $finalQuery->getString() ?: '*:*'; |
155 | |
156 | // Highlighting is enabled if we have a field list set. |
157 | $highlight = !empty($this->fieldsToHighlight); |
158 | |
159 | if ($handler = $this->getSearchHandler($finalQuery->getHandler(), $string)) { |
160 | $string = $handler->preprocessQueryString($string); |
161 | if ( |
162 | !$handler->hasExtendedDismax() |
163 | && $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string) |
164 | ) { |
165 | $string = $this->createAdvancedInnerSearchString($string, $handler); |
166 | if ($handler->hasDismax()) { |
167 | $oldString = $string; |
168 | $string = $handler->createBoostQueryString($string); |
169 | |
170 | // If a boost was added, we don't want to highlight based on |
171 | // the boost query, so we should use the non-boosted version: |
172 | if ($highlight && $oldString != $string) { |
173 | $newParams->set('hl.q', $oldString); |
174 | } |
175 | } |
176 | } elseif ($handler->hasDismax()) { |
177 | $newParams->set('qf', implode(' ', $handler->getDismaxFields())); |
178 | $newParams->set('qt', $handler->getDismaxHandler()); |
179 | foreach ($handler->getDismaxParams() as $param) { |
180 | $newParams->add(reset($param), next($param)); |
181 | } |
182 | if ($handler->hasFilterQuery()) { |
183 | $newParams->add('fq', $handler->getFilterQuery()); |
184 | } |
185 | } else { |
186 | $string = $handler->createSimpleQueryString($string); |
187 | } |
188 | } |
189 | // Set an appropriate highlight field list when applicable: |
190 | if ($highlight) { |
191 | $filter = $handler ? $handler->getAllFields() : []; |
192 | $newParams->add('hl.fl', $this->getFieldsToHighlight($filter)); |
193 | } |
194 | $newParams->set('q', $string); |
195 | |
196 | // Handle any extra parameters: |
197 | foreach ($this->globalExtraParams as $extraParam) { |
198 | if (empty($extraParam['param']) || empty($extraParam['value'])) { |
199 | continue; |
200 | } |
201 | if ( |
202 | !$this->checkParamConditions($query, $params, $extraParam['conditions'] ?? []) |
203 | ) { |
204 | continue; |
205 | } |
206 | foreach ((array)$extraParam['value'] as $value) { |
207 | $newParams->add($extraParam['param'], $value); |
208 | } |
209 | } |
210 | |
211 | return $newParams; |
212 | } |
213 | |
214 | /** |
215 | * Check if the conditions match for an extra parameter |
216 | * |
217 | * @param AbstractQuery $query Search query |
218 | * @param ?ParamBag $params Search backend parameters |
219 | * @param array $conditions Required conditions |
220 | * |
221 | * @return bool |
222 | */ |
223 | protected function checkParamConditions( |
224 | AbstractQuery $query, |
225 | ?ParamBag $params, |
226 | array $conditions |
227 | ): bool { |
228 | if (empty($conditions)) { |
229 | return true; |
230 | } |
231 | $searchTypes = $this->getSearchTypes($query); |
232 | foreach ($conditions as $condition) { |
233 | if (!is_array($condition)) { |
234 | continue; |
235 | } |
236 | $values = reset($condition); |
237 | $condition = key($condition); |
238 | switch ($condition) { |
239 | case 'SearchTypeIn': |
240 | if (empty(array_intersect((array)$values, $searchTypes))) { |
241 | return false; |
242 | } |
243 | break; |
244 | case 'AllSearchTypesIn': |
245 | if (array_diff($searchTypes, (array)$values)) { |
246 | return false; |
247 | } |
248 | break; |
249 | case 'SearchTypeNotIn': |
250 | if (!empty(array_intersect((array)$values, $searchTypes))) { |
251 | return false; |
252 | } |
253 | break; |
254 | case 'NoDismaxParams': |
255 | foreach ((array)$values as $value) { |
256 | if ($this->hasDismaxParamsField($searchTypes, $value)) { |
257 | return false; |
258 | } |
259 | } |
260 | break; |
261 | case 'SortIn': |
262 | $sort = $params?->get('sort'); |
263 | if (empty(array_intersect((array)$values, (array)$sort))) { |
264 | return false; |
265 | } |
266 | break; |
267 | case 'SortNotIn': |
268 | $sort = $params?->get('sort'); |
269 | if (!empty(array_intersect((array)$values, (array)$sort))) { |
270 | return false; |
271 | } |
272 | break; |
273 | default: |
274 | throw new \Exception("Unknown parameter condition: $condition"); |
275 | } |
276 | } |
277 | return true; |
278 | } |
279 | |
280 | /** |
281 | * Check if any of the given search types has the field in DismaxParams |
282 | * |
283 | * @param array $searchTypes Search types to check |
284 | * @param string $field Field to check for |
285 | * |
286 | * @return bool |
287 | */ |
288 | protected function hasDismaxParamsField(array $searchTypes, string $field): bool |
289 | { |
290 | foreach ($searchTypes as $searchType) { |
291 | if ($handler = $this->getSearchHandler($searchType, '')) { |
292 | foreach ($handler->getDismaxParams() as $param) { |
293 | if (reset($param) === $field) { |
294 | return true; |
295 | } |
296 | } |
297 | } |
298 | } |
299 | return false; |
300 | } |
301 | |
302 | /** |
303 | * Get an array of search types used in the given search |
304 | * |
305 | * @param AbstractQuery $query Query |
306 | * |
307 | * @return array |
308 | */ |
309 | protected function getSearchTypes(AbstractQuery $query): array |
310 | { |
311 | if ($query instanceof QueryGroup) { |
312 | $callback = function ($carry, $item) { |
313 | return array_merge($carry, $this->getSearchTypes($item)); |
314 | }; |
315 | return array_unique(array_reduce($query->getQueries(), $callback, [])); |
316 | } |
317 | return [$query->getHandler()]; |
318 | } |
319 | |
320 | /** |
321 | * Get list of fields to highlight, filtered by array. |
322 | * |
323 | * @param array $filter Field list to use as a filter. |
324 | * |
325 | * @return string |
326 | */ |
327 | protected function getFieldsToHighlight(array $filter = []) |
328 | { |
329 | // No filter? Return unmodified default: |
330 | if (empty($filter)) { |
331 | return $this->fieldsToHighlight; |
332 | } |
333 | // Account for possibility of comma OR space delimiters: |
334 | $fields = array_map('trim', preg_split('/[, ]/', $this->fieldsToHighlight)); |
335 | // Wildcard in field list? Return filter as-is; otherwise, use intersection. |
336 | $list = in_array('*', $fields) ? $filter : array_intersect($fields, $filter); |
337 | return implode(',', $list); |
338 | } |
339 | |
340 | /** |
341 | * Set list of fields to highlight, if any (or '*' for all). Set to an |
342 | * empty string (the default) to completely disable highlighting-related |
343 | * functionality. |
344 | * |
345 | * @param string $list Highlighting field list |
346 | * |
347 | * @return QueryBuilder |
348 | */ |
349 | public function setFieldsToHighlight($list) |
350 | { |
351 | $this->fieldsToHighlight = $list; |
352 | return $this; |
353 | } |
354 | |
355 | /** |
356 | * Control whether or not the QueryBuilder should create a spellcheck.q |
357 | * parameter. (Turned off by default). |
358 | * |
359 | * @param bool $enable Should spelling query generation be enabled? |
360 | * |
361 | * @return void |
362 | */ |
363 | public function setCreateSpellingQuery($enable) |
364 | { |
365 | $this->createSpellingQuery = $enable; |
366 | } |
367 | |
368 | /** |
369 | * Set query builder search specs. |
370 | * |
371 | * @param array $specs Search specs |
372 | * |
373 | * @return void |
374 | */ |
375 | public function setSpecs(array $specs) |
376 | { |
377 | foreach ($specs as $handler => $spec) { |
378 | if ('GlobalExtraParams' === $handler) { |
379 | $this->globalExtraParams = $spec; |
380 | continue; |
381 | } |
382 | if (isset($spec['ExactSettings'])) { |
383 | $this->exactSpecs[strtolower($handler)] = new SearchHandler( |
384 | $spec['ExactSettings'], |
385 | $this->defaultDismaxHandler |
386 | ); |
387 | unset($spec['ExactSettings']); |
388 | } |
389 | $this->specs[strtolower($handler)] |
390 | = new SearchHandler($spec, $this->defaultDismaxHandler); |
391 | } |
392 | } |
393 | |
394 | /** |
395 | * Get Lucene syntax helper |
396 | * |
397 | * @return LuceneSyntaxHelper |
398 | */ |
399 | public function getLuceneHelper() |
400 | { |
401 | if (null === $this->luceneHelper) { |
402 | $this->luceneHelper = new LuceneSyntaxHelper(); |
403 | } |
404 | return $this->luceneHelper; |
405 | } |
406 | |
407 | /** |
408 | * Set Lucene syntax helper |
409 | * |
410 | * @param LuceneSyntaxHelper $helper Lucene syntax helper |
411 | * |
412 | * @return void |
413 | */ |
414 | public function setLuceneHelper(LuceneSyntaxHelper $helper) |
415 | { |
416 | $this->luceneHelper = $helper; |
417 | } |
418 | |
419 | /// Internal API |
420 | |
421 | /** |
422 | * Return named search handler. |
423 | * |
424 | * @param string $handler Search handler name |
425 | * @param string $searchString Search query |
426 | * |
427 | * @return SearchHandler|null |
428 | */ |
429 | protected function getSearchHandler($handler, string $searchString) |
430 | { |
431 | $handler = $handler ? strtolower($handler) : $handler; |
432 | if ($handler) { |
433 | // Since we will rarely have exactSpecs set, it is less expensive |
434 | // to check for a handler first before doing multiple string |
435 | // operations to determine eligibility for exact handling. |
436 | if (isset($this->exactSpecs[$handler])) { |
437 | $searchString = trim($searchString); |
438 | if ( |
439 | strlen($searchString) > 1 |
440 | && str_starts_with($searchString, '"') |
441 | && str_ends_with($searchString, '"') |
442 | ) { |
443 | return $this->exactSpecs[$handler]; |
444 | } |
445 | } |
446 | if (isset($this->specs[$handler])) { |
447 | return $this->specs[$handler]; |
448 | } |
449 | } |
450 | return null; |
451 | } |
452 | |
453 | /** |
454 | * Reduce query group a single query. |
455 | * |
456 | * @param QueryGroup $group Query group to reduce |
457 | * |
458 | * @return Query |
459 | */ |
460 | protected function reduceQueryGroup(QueryGroup $group) |
461 | { |
462 | $searchString = $this->reduceQueryGroupComponents($group); |
463 | $searchHandler = $group->getReducedHandler(); |
464 | return new Query($searchString, $searchHandler); |
465 | } |
466 | |
467 | /** |
468 | * Reduce components of query group to a search string of a simple query. |
469 | * |
470 | * This function implements the recursive reduction of a query group. |
471 | * |
472 | * @param AbstractQuery $component Component |
473 | * |
474 | * @return string |
475 | * |
476 | * @see \VuFindSearch\Backend\Solr\QueryBuilder::reduceQueryGroup() |
477 | */ |
478 | protected function reduceQueryGroupComponents(AbstractQuery $component) |
479 | { |
480 | if ($component instanceof QueryGroup) { |
481 | $reduced = array_map( |
482 | [$this, 'reduceQueryGroupComponents'], |
483 | $component->getQueries() |
484 | ); |
485 | $reduced = array_filter( |
486 | $reduced, |
487 | function ($s) { |
488 | return '' !== $s; |
489 | } |
490 | ); |
491 | $searchString = $reduced |
492 | ? ('(' . implode(" {$component->getOperator()} ", $reduced) . ')') |
493 | : ''; |
494 | if ($component->isNegated() && !empty($searchString)) { |
495 | $searchString = '(*:* NOT ' . $searchString . ')'; |
496 | } |
497 | } else { |
498 | $searchString = $this->getNormalizedQueryString($component); |
499 | $searchHandler = $this->getSearchHandler( |
500 | $component->getHandler(), |
501 | $searchString |
502 | ); |
503 | if ($searchHandler && '' !== $searchString) { |
504 | $searchString |
505 | = $this->createSearchString($searchString, $searchHandler); |
506 | } |
507 | } |
508 | return $searchString; |
509 | } |
510 | |
511 | /** |
512 | * Return search string based on input and handler. |
513 | * |
514 | * @param string $string Input search string |
515 | * @param SearchHandler $handler Search handler |
516 | * |
517 | * @return string |
518 | */ |
519 | protected function createSearchString($string, SearchHandler $handler = null) |
520 | { |
521 | $advanced = $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string); |
522 | |
523 | if (null === $string) { |
524 | return ''; |
525 | } |
526 | if ($advanced && $handler) { |
527 | return $handler->createAdvancedQueryString($string); |
528 | } elseif ($handler) { |
529 | return $handler->createSimpleQueryString($string); |
530 | } else { |
531 | return $string; |
532 | } |
533 | } |
534 | |
535 | /** |
536 | * If the query ends in a non-escaped question mark, the user may not really |
537 | * intend to use the question mark as a wildcard -- let's account for that |
538 | * possibility. |
539 | * |
540 | * @param string $string Search query to adjust |
541 | * |
542 | * @return string |
543 | */ |
544 | protected function fixTrailingQuestionMarks($string) |
545 | { |
546 | // Treat colon and whitespace as word separators -- in either case, we |
547 | // should add parentheses for accuracy. |
548 | $multiword = preg_match('/[^\s][\s:]+[^\s]/', $string); |
549 | $callback = function ($matches) use ($multiword) { |
550 | // Make sure all question marks are properly escaped (first unescape |
551 | // any that are already escaped to prevent double-escapes, then escape |
552 | // all of them): |
553 | $s = $matches[1]; |
554 | $escaped = str_replace('?', '\?', str_replace('\?', '?', $s)); |
555 | $s = "($s) OR ($escaped)"; |
556 | if ($multiword) { |
557 | $s = "($s) "; |
558 | } |
559 | return $s; |
560 | }; |
561 | // Use a lookahead to skip matches found within quoted phrases. |
562 | $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)'; |
563 | $string = preg_replace_callback( |
564 | '/([^\s:()]+\?)(\s|$)' . $lookahead . '/', |
565 | $callback, |
566 | $string |
567 | ); |
568 | return rtrim($string); |
569 | } |
570 | |
571 | /** |
572 | * Given a Query object, return a fully normalized version of the query string. |
573 | * |
574 | * @param Query $query Query object |
575 | * |
576 | * @return string |
577 | */ |
578 | protected function getNormalizedQueryString($query) |
579 | { |
580 | return $this->fixTrailingQuestionMarks( |
581 | $this->getLuceneHelper()->normalizeSearchString( |
582 | $query->getString() |
583 | ) |
584 | ); |
585 | } |
586 | |
587 | /** |
588 | * Return advanced inner search string based on input and handler. |
589 | * |
590 | * @param string $string Input search string |
591 | * @param SearchHandler $handler Search handler |
592 | * |
593 | * @return string |
594 | */ |
595 | protected function createAdvancedInnerSearchString( |
596 | $string, |
597 | SearchHandler $handler |
598 | ) { |
599 | // Special case -- if the user wants all records but the current handler |
600 | // has a filter query, apply the filter query: |
601 | if (trim($string) === '*:*' && $handler && $handler->hasFilterQuery()) { |
602 | return $handler->getFilterQuery(); |
603 | } |
604 | |
605 | // If the query already includes field specifications, we can't easily |
606 | // apply it to other fields through our defined handlers, so we'll leave |
607 | // it as-is: |
608 | if (strstr($string, ':')) { |
609 | return $string; |
610 | } |
611 | |
612 | return $handler |
613 | ? $handler->createAdvancedQueryString($string) : $string; |
614 | } |
615 | } |