Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
99.54% |
216 / 217 |
|
93.33% |
14 / 15 |
CRAP | |
0.00% |
0 / 1 |
Backend | |
99.54% |
216 / 217 |
|
93.33% |
14 / 15 |
60 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
search | |
100.00% |
52 / 52 |
|
100.00% |
1 / 1 |
8 | |||
processBackendFailures | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
4 | |||
createRecordCollection | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
fillMergedCollection | |
100.00% |
41 / 41 |
|
100.00% |
1 / 1 |
12 | |||
retrieve | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRecordCollectionFactory | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getActiveBackends | |
97.06% |
33 / 34 |
|
0.00% |
0 / 1 |
13 | |||
getRecord | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
getBlockSize | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
6 | |||
onSearchPre | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
onSearchPost | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
triggerSearchEvent | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
collectEventResults | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
convertSearchEvent | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | /** |
4 | * Blender backend. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) The National Library of Finland 2019-2022. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org |
28 | */ |
29 | |
30 | namespace VuFindSearch\Backend\Blender; |
31 | |
32 | use Laminas\EventManager\EventInterface; |
33 | use Laminas\EventManager\EventManager; |
34 | use VuFindSearch\Backend\AbstractBackend; |
35 | use VuFindSearch\Backend\BackendInterface; |
36 | use VuFindSearch\Backend\Blender\Response\Json\RecordCollection; |
37 | use VuFindSearch\Command\SearchCommand; |
38 | use VuFindSearch\ParamBag; |
39 | use VuFindSearch\Query\AbstractQuery; |
40 | use VuFindSearch\Response\RecordCollectionInterface; |
41 | use VuFindSearch\Response\RecordInterface; |
42 | |
43 | use function count; |
44 | use function intval; |
45 | |
46 | /** |
47 | * Blender backend. |
48 | * |
49 | * @category VuFind |
50 | * @package Search |
51 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
52 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
53 | * @link https://vufind.org |
54 | */ |
55 | class Backend extends AbstractBackend |
56 | { |
57 | use \VuFindSearch\Feature\SearchBackendEventManagerTrait; |
58 | |
59 | /** |
60 | * Actual backends |
61 | * |
62 | * @var array |
63 | */ |
64 | protected $backends; |
65 | |
66 | /** |
67 | * Limit for number of records to blend |
68 | * |
69 | * @var int |
70 | */ |
71 | protected $blendLimit; |
72 | |
73 | /** |
74 | * Block size for interleaved records |
75 | * |
76 | * @var int |
77 | */ |
78 | protected $blockSize; |
79 | |
80 | /** |
81 | * Adaptive block sizes for interleaved records |
82 | * |
83 | * @var array |
84 | */ |
85 | protected $adaptiveBlockSizes; |
86 | |
87 | /** |
88 | * Blender configuration |
89 | * |
90 | * @var \Laminas\Config\Config |
91 | */ |
92 | protected $config; |
93 | |
94 | /** |
95 | * Mappings configuration |
96 | * |
97 | * @var array |
98 | */ |
99 | protected $mappings; |
100 | |
101 | /** |
102 | * Event manager. |
103 | * |
104 | * @var EventManager |
105 | */ |
106 | protected $events; |
107 | |
108 | /** |
109 | * Constructor. |
110 | * |
111 | * @param array $backends Actual backends |
112 | * @param \Laminas\Config\Config $config Blender configuration |
113 | * @param array $mappings Mappings configuration |
114 | * @param EventManager $events Event manager |
115 | * |
116 | * @return void |
117 | */ |
118 | public function __construct( |
119 | array $backends, |
120 | \Laminas\Config\Config $config, |
121 | $mappings, |
122 | EventManager $events |
123 | ) { |
124 | $this->backends = $backends; |
125 | $this->config = $config; |
126 | $this->mappings = $mappings; |
127 | $this->setEventManager($events); |
128 | |
129 | $boostMax = isset($this->config->Blending->initialResults) |
130 | ? count($this->config->Blending->initialResults->toArray()) |
131 | : 0; |
132 | $this->blendLimit = max(20, $boostMax); |
133 | $this->blockSize = intval($this->config->Blending->blockSize ?? 10); |
134 | $this->adaptiveBlockSizes |
135 | = isset($this->config->Blending->adaptiveBlockSizes) |
136 | ? $this->config->Blending->adaptiveBlockSizes->toArray() |
137 | : []; |
138 | } |
139 | |
140 | /** |
141 | * Perform a search and return record collection. |
142 | * |
143 | * @param AbstractQuery $query Search query |
144 | * @param int $offset Search offset |
145 | * @param int $limit Search limit |
146 | * @param ParamBag $params Search backend parameters |
147 | * |
148 | * @return RecordCollectionInterface |
149 | */ |
150 | public function search( |
151 | AbstractQuery $query, |
152 | $offset, |
153 | $limit, |
154 | ParamBag $params = null |
155 | ) { |
156 | $mergedCollection = $this->createRecordCollection(); |
157 | |
158 | $backendDetails = []; |
159 | $activeBackends = $this->getActiveBackends( |
160 | $params, |
161 | $mergedCollection->getFacetDelimiter('blender_backend') |
162 | ); |
163 | foreach ($activeBackends as $backendId => $backend) { |
164 | $backendDetails[$backendId] = [ |
165 | 'backend' => $backend, |
166 | 'query' => $params->get("query_$backendId")[0], |
167 | 'params' => $params->get("params_$backendId")[0], |
168 | ]; |
169 | } |
170 | if (!$backendDetails) { |
171 | return $mergedCollection; |
172 | } |
173 | |
174 | $blendLimit = $limit === 0 ? 0 : $this->blendLimit; |
175 | // Fetch records from backends up to the number of initially boosted records: |
176 | $collections = []; |
177 | $exceptions = []; |
178 | foreach ($backendDetails as $backendId => $details) { |
179 | try { |
180 | $collections[$backendId] = $details['backend']->search( |
181 | $details['query'], |
182 | 0, |
183 | $blendLimit, |
184 | $details['params'] |
185 | ); |
186 | } catch (\Exception $e) { |
187 | $exceptions[$backendId] = $e; |
188 | } |
189 | } |
190 | |
191 | $this->processBackendFailures( |
192 | $mergedCollection, |
193 | $exceptions, |
194 | !empty($collections) |
195 | ); |
196 | |
197 | $totalCount = 0; |
198 | foreach ($collections as $collection) { |
199 | $totalCount += $collection->getTotal(); |
200 | } |
201 | $blockSize = $this->getBlockSize($totalCount); |
202 | |
203 | $backendRecords = $mergedCollection->initBlended( |
204 | $collections, |
205 | $blendLimit, |
206 | $blockSize, |
207 | $totalCount |
208 | ); |
209 | |
210 | if ($limit) { |
211 | $this->fillMergedCollection( |
212 | $mergedCollection, |
213 | $collections, |
214 | $backendDetails, |
215 | $backendRecords, |
216 | $offset + $limit, |
217 | $blockSize |
218 | ); |
219 | } |
220 | |
221 | $mergedCollection->slice($offset, $limit); |
222 | |
223 | return $mergedCollection; |
224 | } |
225 | |
226 | /** |
227 | * Process any backend exceptions and throw an exception if all failed or add an |
228 | * error message if some of them failed. |
229 | * |
230 | * @param RecordCollection $mergedCollection Result collection |
231 | * @param array $exceptions Exceptions |
232 | * @param bool $haveResults Whether any results are available |
233 | * |
234 | * @return void |
235 | * @throws \Exception |
236 | */ |
237 | protected function processBackendFailures( |
238 | RecordCollection $mergedCollection, |
239 | array $exceptions, |
240 | bool $haveResults |
241 | ): void { |
242 | $failedBackends = []; |
243 | foreach ($exceptions as $backendId => $exception) { |
244 | // Throw exception right away if we didn't get any results or the query |
245 | // is invalid for a backend: |
246 | if (!$haveResults) { |
247 | // No results and an exception previously encountered, raise it now: |
248 | throw $exception; |
249 | } |
250 | // Log the errors and collect a list to display to the user: |
251 | $this->logError("Search in $backendId failed: " . (string)$exception); |
252 | $failedBackends[] = $this->config->Backends[$backendId]; |
253 | } |
254 | if ($failedBackends) { |
255 | $mergedCollection->addError( |
256 | [ |
257 | 'msg' => 'search_backend_partial_failure', |
258 | 'tokens' => [ |
259 | '%%sources%%' => implode(', ', $failedBackends), |
260 | ], |
261 | ] |
262 | ); |
263 | } |
264 | } |
265 | |
266 | /** |
267 | * Create record collection. |
268 | * |
269 | * @return Response\Json\RecordCollection |
270 | */ |
271 | protected function createRecordCollection(): Response\Json\RecordCollection |
272 | { |
273 | $collection = new Response\Json\RecordCollection( |
274 | $this->config, |
275 | $this->mappings |
276 | ); |
277 | $collection->setSourceIdentifier($this->identifier); |
278 | return $collection; |
279 | } |
280 | |
281 | /** |
282 | * Add records to the merged collection in a round-robin fashion up to the |
283 | * specified limit |
284 | * |
285 | * @param RecordCollectionInterface $mergedCollection Merged collection |
286 | * @param array $collections Source collections |
287 | * @param array $backendDetails Active backend details |
288 | * @param array $backendRecords Backend record buffers |
289 | * @param int $limit Record limit |
290 | * @param int $blockSize Block size |
291 | * |
292 | * @return void |
293 | */ |
294 | protected function fillMergedCollection( |
295 | RecordCollectionInterface $mergedCollection, |
296 | array $collections, |
297 | array $backendDetails, |
298 | array $backendRecords, |
299 | int $limit, |
300 | int $blockSize |
301 | ): void { |
302 | // Fill up to the required records in a round-robin fashion |
303 | if ($limit <= $mergedCollection->count()) { |
304 | return; |
305 | } |
306 | |
307 | $backendOffsets = []; |
308 | $backendTotals = []; |
309 | $availableBackendIds = array_keys($collections); |
310 | foreach ($availableBackendIds as $backendId) { |
311 | $backendOffsets[$backendId] = 0; |
312 | $backendTotals[$backendId] = $collections[$backendId]->getTotal(); |
313 | } |
314 | // First iterate through the merged records before the offset to |
315 | // calculate proper backend offsets for further records: |
316 | $records = $mergedCollection->getRecords(); |
317 | $pos = 0; |
318 | foreach ($records as $record) { |
319 | ++$pos; |
320 | ++$backendOffsets[$record->getSearchBackendIdentifier()]; |
321 | } |
322 | |
323 | // Fetch records |
324 | $backendCount = count($availableBackendIds); |
325 | for (; $pos < $limit; $pos++) { |
326 | $currentBlock = floor($pos / $blockSize); |
327 | $backendAtPos = $availableBackendIds[$currentBlock % $backendCount]; |
328 | |
329 | $offsetOk = $backendOffsets[$backendAtPos] |
330 | < $backendTotals[$backendAtPos]; |
331 | $record = $offsetOk ? $this->getRecord( |
332 | $backendDetails[$backendAtPos], |
333 | $backendRecords[$backendAtPos], |
334 | $backendOffsets[$backendAtPos]++, |
335 | $blockSize |
336 | ) : null; |
337 | |
338 | if (null === $record) { |
339 | // Try other backends: |
340 | foreach ($availableBackendIds as $backendId) { |
341 | if ($backendId === $backendAtPos) { |
342 | continue; |
343 | } |
344 | $offsetOk = $backendOffsets[$backendId] |
345 | < $backendTotals[$backendId]; |
346 | $record = $offsetOk ? $this->getRecord( |
347 | $backendDetails[$backendId], |
348 | $backendRecords[$backendId], |
349 | $backendOffsets[$backendId]++, |
350 | $blockSize |
351 | ) : null; |
352 | |
353 | if (null !== $record) { |
354 | break; |
355 | } |
356 | } |
357 | } |
358 | |
359 | if (null === $record) { |
360 | break; |
361 | } |
362 | $mergedCollection->add($record, false); |
363 | } |
364 | } |
365 | |
366 | /** |
367 | * Retrieve a single document. |
368 | * |
369 | * @param string $id Document identifier |
370 | * @param ParamBag $params Search backend parameters |
371 | * |
372 | * @return \VuFindSearch\Response\RecordCollectionInterface |
373 | */ |
374 | public function retrieve($id, ParamBag $params = null) |
375 | { |
376 | throw new \Exception('Blender does not support retrieve'); |
377 | } |
378 | |
379 | /** |
380 | * Return the record collection factory. |
381 | * |
382 | * Lazy loads a generic collection factory. |
383 | * |
384 | * @return RecordCollectionFactoryInterface |
385 | */ |
386 | public function getRecordCollectionFactory() |
387 | { |
388 | throw new \Exception('getRecordCollectionFactory not supported in Blender'); |
389 | } |
390 | |
391 | /** |
392 | * Get active backends for a search |
393 | * |
394 | * @param ?ParamBag $params Search backend parameters |
395 | * @param string $delimiter Delimiter for the blender_backend facet |
396 | * |
397 | * @return array |
398 | */ |
399 | protected function getActiveBackends(?ParamBag $params, string $delimiter): array |
400 | { |
401 | if (null === $params) { |
402 | // Can't do anything without backend params.. |
403 | return []; |
404 | } |
405 | |
406 | $activeBackends = $this->backends; |
407 | |
408 | // Handle the blender_backend pseudo-filter |
409 | $fq = $params->get('fq') ?? []; |
410 | $filteredBackends = []; |
411 | // Handle AND and OR filters first: |
412 | foreach ($fq as $filter) { |
413 | $advancedOr = preg_match( |
414 | '/\{!tag=blender_backend_filter}blender_backend:\((.+)\)/', |
415 | $filter, |
416 | $matches |
417 | ); |
418 | if ($advancedOr) { |
419 | $filter = explode(' OR ', $matches[1]); |
420 | } |
421 | foreach ((array)$filter as $current) { |
422 | if (strncmp($current, 'blender_backend:', 16) === 0) { |
423 | $active = trim(substr($current, 16), '"'); |
424 | if ($delimiter) { |
425 | [$active] = explode($delimiter, $active, 2); |
426 | } |
427 | if (!isset($activeBackends[$active])) { |
428 | $this->logWarning( |
429 | "Invalid blender_backend filter: Backend $active not" |
430 | . ' enabled' |
431 | ); |
432 | } else { |
433 | $filteredBackends[$active] = $activeBackends[$active]; |
434 | } |
435 | } |
436 | } |
437 | } |
438 | if ($filteredBackends) { |
439 | $activeBackends = $filteredBackends; |
440 | } |
441 | // Handle NOT filters last: |
442 | foreach ($fq as $current) { |
443 | if (strncmp($current, '-blender_backend:', 17) === 0) { |
444 | $disabled = trim(substr($current, 17), '"'); |
445 | if ($delimiter) { |
446 | [$disabled] = explode($delimiter, $disabled, 2); |
447 | } |
448 | if (isset($activeBackends[$disabled])) { |
449 | unset($activeBackends[$disabled]); |
450 | } |
451 | } |
452 | } |
453 | |
454 | return $activeBackends; |
455 | } |
456 | |
457 | /** |
458 | * Get next record from the given backend. |
459 | * |
460 | * Gets next records from the previously retrieved array of records or retrieves |
461 | * a new batch of records from the backend. |
462 | * |
463 | * @param array $backendDetails Details for the backend |
464 | * @param array $backendRecords Record buffer |
465 | * @param int $offset Record offset |
466 | * @param int $blockSize Blending block size |
467 | * |
468 | * @return RecordInterface|null |
469 | */ |
470 | protected function getRecord( |
471 | array $backendDetails, |
472 | array &$backendRecords, |
473 | int $offset, |
474 | int $blockSize |
475 | ): ?RecordInterface { |
476 | if (!$backendRecords) { |
477 | $collection = $backendDetails['backend']->search( |
478 | $backendDetails['query'], |
479 | $offset, |
480 | max($blockSize, 20), |
481 | $backendDetails['params'] |
482 | ); |
483 | $backendRecords = $collection->getRecords(); |
484 | } |
485 | return $backendRecords ? array_shift($backendRecords) : null; |
486 | } |
487 | |
488 | /** |
489 | * Get the block size for the given result count |
490 | * |
491 | * @param int $resultCount Result count |
492 | * |
493 | * @return int |
494 | */ |
495 | protected function getBlockSize(int $resultCount): int |
496 | { |
497 | foreach ($this->adaptiveBlockSizes as $size) { |
498 | $parts = explode(':', $size, 2); |
499 | $blockSize = intval($parts[1] ?? 0); |
500 | if ($blockSize === 0) { |
501 | throw new \Exception("Invalid adaptive block size: $size"); |
502 | } |
503 | $rangeParts = explode('-', $parts[0]); |
504 | $from = intval($rangeParts[0]); |
505 | $to = intval($rangeParts[1] ?? 0); |
506 | if ($from > $to) { |
507 | throw new \Exception("Invalid adaptive block size: $size"); |
508 | } |
509 | if ($from <= $resultCount && $resultCount <= $to) { |
510 | return $blockSize; |
511 | } |
512 | } |
513 | return $this->blockSize; |
514 | } |
515 | |
516 | /** |
517 | * Trigger pre-search events for all backends. |
518 | * |
519 | * @param EventInterface $event Event |
520 | * |
521 | * @return EventInterface |
522 | */ |
523 | public function onSearchPre(EventInterface $event): EventInterface |
524 | { |
525 | return $this->triggerSearchEvent($event); |
526 | } |
527 | |
528 | /** |
529 | * Trigger post-search events for all backends. |
530 | * |
531 | * @param EventInterface $event Event |
532 | * |
533 | * @return EventInterface |
534 | */ |
535 | public function onSearchPost(EventInterface $event): EventInterface |
536 | { |
537 | return $this->triggerSearchEvent($event); |
538 | } |
539 | |
540 | /** |
541 | * Trigger pre-search events for all backends. |
542 | * |
543 | * @param EventInterface $event Event |
544 | * |
545 | * @return EventInterface |
546 | */ |
547 | protected function triggerSearchEvent(EventInterface $event) |
548 | { |
549 | $command = $event->getParam('command'); |
550 | if ( |
551 | $command->getTargetIdentifier() !== $this->getIdentifier() |
552 | || !($command instanceof SearchCommand) |
553 | ) { |
554 | return $event; |
555 | } |
556 | |
557 | // Trigger the event for all backends: |
558 | foreach ($this->backends as $id => $backend) { |
559 | $this->convertSearchEvent($event, $command, $backend); |
560 | $this->events->triggerEvent($event); |
561 | $this->collectEventResults($command, $event->getParam('command'), $id); |
562 | } |
563 | |
564 | // Restore the event and return it: |
565 | $event->setParam('command', $command); |
566 | $event->setParam('backend', $this->getIdentifier()); |
567 | $event->setTarget($this); |
568 | return $event; |
569 | } |
570 | |
571 | /** |
572 | * Collect results back into the Command after an event has been processed |
573 | * |
574 | * @param SearchCommand $command Search command |
575 | * @param SearchCommand $backendCommand Backend-specific command |
576 | * @param string $backendId Backend identifier |
577 | * |
578 | * @return void |
579 | */ |
580 | protected function collectEventResults( |
581 | SearchCommand $command, |
582 | SearchCommand $backendCommand, |
583 | string $backendId |
584 | ): void { |
585 | $command->getSearchParameters()->set( |
586 | "query_$backendId", |
587 | $backendCommand->getQuery() |
588 | ); |
589 | $command->getSearchParameters()->set( |
590 | "params_$backendId", |
591 | $backendCommand->getSearchParameters() |
592 | ); |
593 | } |
594 | |
595 | /** |
596 | * Convert a search event to another backend |
597 | * |
598 | * @param EventInterface $event Event |
599 | * @param SearchCommand $command Search command |
600 | * @param BackendInterface $backend Target backend |
601 | * |
602 | * @return EventInterface |
603 | */ |
604 | protected function convertSearchEvent( |
605 | EventInterface $event, |
606 | SearchCommand $command, |
607 | BackendInterface $backend |
608 | ): EventInterface { |
609 | $backendId = $backend->getIdentifier(); |
610 | |
611 | $newCommand = clone $command; |
612 | $newCommand->setTargetIdentifier($backendId); |
613 | $params = $command->getSearchParameters(); |
614 | $newCommand->setQuery($params->get("query_$backendId")[0]); |
615 | $newCommand->setSearchParameters($params->get("params_$backendId")[0]); |
616 | |
617 | $event->setParam('command', $newCommand); |
618 | $event->setParam('backend', $backendId); |
619 | $event->setTarget($backend); |
620 | return $event; |
621 | } |
622 | } |