Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.13% |
149 / 155 |
|
80.00% |
12 / 15 |
CRAP | |
0.00% |
0 / 1 |
RecordCollection | |
96.13% |
149 / 155 |
|
80.00% |
12 / 15 |
60 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
initBlended | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
4 | |||
addError | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getErrors | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSourceIdentifier | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFacetDelimiter | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
collectBackendRecords | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
add | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
addErrorsFromBackends | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
5 | |||
getBackendAtPosition | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getMergedFacets | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
4 | |||
mapFacetValues | |
87.88% |
29 / 33 |
|
0.00% |
0 / 1 |
13.30 | |||
getHierarchyParentKeys | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
getBlenderFacetStats | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
7 | |||
convertFacetValue | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
8.09 |
1 | <?php |
2 | |
3 | /** |
4 | * JSON-based record collection for records from multiple sources. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) The National Library of Finland 2022. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link http://vufind.org |
28 | */ |
29 | |
30 | namespace VuFindSearch\Backend\Blender\Response\Json; |
31 | |
32 | use VuFindSearch\Response\RecordInterface; |
33 | |
34 | use function array_slice; |
35 | use function count; |
36 | use function in_array; |
37 | use function intval; |
38 | use function is_array; |
39 | use function is_string; |
40 | |
41 | /** |
42 | * JSON-based record collection for records from multiple sources. |
43 | * |
44 | * @category VuFind |
45 | * @package Search |
46 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
47 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
48 | * @link http://vufind.org |
49 | */ |
50 | class RecordCollection extends \VuFindSearch\Backend\Solr\Response\Json\RecordCollection |
51 | { |
52 | /** |
53 | * Blender configuration |
54 | * |
55 | * @var \Laminas\Config\Config |
56 | */ |
57 | protected $config; |
58 | |
59 | /** |
60 | * Mappings configuration |
61 | * |
62 | * @var array |
63 | */ |
64 | protected $mappings; |
65 | |
66 | /** |
67 | * Backends to be used for initial results |
68 | * |
69 | * @var array |
70 | */ |
71 | protected $initialResultsBackends; |
72 | |
73 | /** |
74 | * Any errors encountered |
75 | * |
76 | * @var array |
77 | */ |
78 | protected $errors = []; |
79 | |
80 | /** |
81 | * Constructor |
82 | * |
83 | * @param \Laminas\Config\Config $config Configuration |
84 | * @param array $mappings Mappings configuration |
85 | */ |
86 | public function __construct($config = null, $mappings = []) |
87 | { |
88 | $this->config = $config; |
89 | $this->mappings = $mappings; |
90 | $this->response = static::$template; |
91 | $this->initialResultsBackends |
92 | = isset($this->config->Blending->initialResults) |
93 | ? $this->config->Blending->initialResults->toArray() |
94 | : []; |
95 | } |
96 | |
97 | /** |
98 | * Initialize blended results |
99 | * |
100 | * Creates a record list from 0 to $limit |
101 | * |
102 | * @param array $collections Array of record collections |
103 | * @param int $limit Result limit |
104 | * @param int $blockSize Blending block size |
105 | * @param int $totalCount Total result count |
106 | * |
107 | * @return array Remaining records keyed by backend identifier |
108 | */ |
109 | public function initBlended( |
110 | array $collections, |
111 | int $limit, |
112 | int $blockSize, |
113 | int $totalCount |
114 | ): array { |
115 | $this->response = static::$template; |
116 | $this->response['response']['numFound'] = $totalCount; |
117 | $this->rewind(); |
118 | |
119 | if (!$collections) { |
120 | return []; |
121 | } |
122 | |
123 | $backendRecords = $this->collectBackendRecords($collections); |
124 | $this->addErrorsFromBackends($collections); |
125 | |
126 | $backendIds = array_keys($backendRecords); |
127 | // Filter out unavailable backends from initial results source list: |
128 | $initialResultsBackends |
129 | = array_intersect($this->initialResultsBackends, $backendIds); |
130 | // Fill the initial results up to limit with records from correct backends |
131 | // (no need to care about missing ones as the list will be filled later on in |
132 | // Backend): |
133 | for ($pos = 0; $pos < $limit; $pos++) { |
134 | $backendId = $this->getBackendAtPosition( |
135 | $pos, |
136 | $blockSize, |
137 | $backendIds, |
138 | $initialResultsBackends |
139 | ); |
140 | if (!empty($backendRecords[$backendId])) { |
141 | $this->add(array_shift($backendRecords[$backendId]), false); |
142 | } |
143 | } |
144 | |
145 | $this->response['facet_counts']['facet_fields'] |
146 | = $this->getMergedFacets($collections); |
147 | |
148 | return $backendRecords; |
149 | } |
150 | |
151 | /** |
152 | * Add an error message |
153 | * |
154 | * @param mixed $error Error |
155 | * |
156 | * @return void |
157 | */ |
158 | public function addError($error): void |
159 | { |
160 | if (!in_array($error, $this->errors)) { |
161 | $this->errors[] = $error; |
162 | } |
163 | } |
164 | |
165 | /** |
166 | * Return any errors. |
167 | * |
168 | * Each error can be a translatable string or an array that the Flashmessages |
169 | * view helper understands. |
170 | * |
171 | * @return array |
172 | */ |
173 | public function getErrors() |
174 | { |
175 | return $this->errors; |
176 | } |
177 | |
178 | /** |
179 | * Set the source backend identifier. |
180 | * |
181 | * @param string $identifier Backend identifier |
182 | * |
183 | * @return void |
184 | */ |
185 | public function setSourceIdentifier($identifier) |
186 | { |
187 | $this->source = $identifier; |
188 | // Don't touch the records here to keep their original source identifiers |
189 | // intact. We'll handle their search backend identifiers in |
190 | // collectBackendRecords below. |
191 | } |
192 | |
193 | /** |
194 | * Get delimiter for the given facet field |
195 | * |
196 | * @param string $field Facet field |
197 | * |
198 | * @return string |
199 | */ |
200 | public function getFacetDelimiter(string $field): string |
201 | { |
202 | $delimitedFacets = $this->config->Advanced_Settings->delimited_facets ?? []; |
203 | foreach ($delimitedFacets as $current) { |
204 | $parts = explode('|', $current); |
205 | if ($parts[0] === $field) { |
206 | return $parts[1] ?? $this->config->Advanced_Settings->delimiter |
207 | ?? ''; |
208 | } |
209 | } |
210 | return ''; |
211 | } |
212 | |
213 | /** |
214 | * Collect records from all backends to an associative array |
215 | * |
216 | * @param array $collections Array of record collections |
217 | * |
218 | * @return array |
219 | */ |
220 | protected function collectBackendRecords(array $collections): array |
221 | { |
222 | $result = []; |
223 | foreach ($collections as $backendId => $collection) { |
224 | $result[$backendId] = []; |
225 | $records = $collection->getRecords(); |
226 | foreach ($records as $record) { |
227 | $record->setSourceIdentifiers( |
228 | $record->getSourceIdentifier(), |
229 | $backendId |
230 | ); |
231 | $result[$backendId][] = $record; |
232 | } |
233 | } |
234 | return $result; |
235 | } |
236 | |
237 | /** |
238 | * Add a record to the collection. |
239 | * |
240 | * @param RecordInterface $record Record to add |
241 | * @param bool $checkExisting Whether to check for existing record in |
242 | * the collection (slower, but makes sure there are no duplicates) |
243 | * |
244 | * @return void |
245 | */ |
246 | public function add(RecordInterface $record, $checkExisting = true) |
247 | { |
248 | $label = $this->config->Backends[$record->getSearchBackendIdentifier()] |
249 | ?? ''; |
250 | if ($label) { |
251 | $record->addLabel($label, 'source'); |
252 | } |
253 | parent::add($record, $checkExisting); |
254 | } |
255 | |
256 | /** |
257 | * Store errors from all backends |
258 | * |
259 | * @param array $collections Array of record collections |
260 | * |
261 | * @return void |
262 | */ |
263 | protected function addErrorsFromBackends(array $collections): void |
264 | { |
265 | foreach ($collections as $backendId => $collection) { |
266 | foreach ($collection->getErrors() as $error) { |
267 | $label = $this->config->Backends[$backendId]; |
268 | if (is_string($error) && $label) { |
269 | $error = [ |
270 | 'msg' => '%%error%% -- %%label%%', |
271 | 'tokens' => [ |
272 | '%%error%%' => $error, |
273 | '%%label%%' => $label, |
274 | ], |
275 | 'translate' => true, |
276 | 'translateTokens' => true, |
277 | ]; |
278 | } |
279 | $this->addError($error); |
280 | } |
281 | } |
282 | } |
283 | |
284 | /** |
285 | * Calculate the backend to be used for a record at the given position |
286 | * |
287 | * Note: This does not take into account whether there are enough records in the |
288 | * source. |
289 | * |
290 | * @param int $position Position |
291 | * @param int $blockSize Record block size |
292 | * @param array $backendIds Available backends |
293 | * @param array $initialResultsBackends List of backends for initial result |
294 | * boosts |
295 | * |
296 | * @return string |
297 | */ |
298 | protected function getBackendAtPosition( |
299 | int $position, |
300 | int $blockSize, |
301 | array $backendIds, |
302 | array $initialResultsBackends |
303 | ): string { |
304 | if ($boostBackend = $initialResultsBackends[$position] ?? false) { |
305 | return $boostBackend; |
306 | } |
307 | |
308 | // We're outside the blocks affected by boosting, calculate by block |
309 | $currentBlock = floor($position / $blockSize); |
310 | $backendCount = count($backendIds); |
311 | return $backendCount ? $backendIds[$currentBlock % $backendCount] : ''; |
312 | } |
313 | |
314 | /** |
315 | * Merge facets |
316 | * |
317 | * @param array $collections Result collections |
318 | * |
319 | * @return array |
320 | */ |
321 | protected function getMergedFacets(array $collections): array |
322 | { |
323 | $mergedFacets = []; |
324 | |
325 | // Iterate through mappings and merge values. It is important to do it this |
326 | // way since multiple facets may map to a single one. |
327 | $facetFieldData = $this->mappings['Facets']['Fields'] ?? []; |
328 | foreach ($facetFieldData as $facetField => $settings) { |
329 | // Get merged list of facet values: |
330 | $list = $this->mapFacetValues($collections, $settings); |
331 | // Re-sort the list: |
332 | // TODO: Could we support alphabetical order? |
333 | uasort( |
334 | $list, |
335 | function ($a, $b) { |
336 | return $b - $a; |
337 | } |
338 | ); |
339 | $mergedFacets[$facetField] = $list; |
340 | } |
341 | |
342 | $mergedFacets['blender_backend'] = $this->getBlenderFacetStats($collections); |
343 | |
344 | // Convert the array back to Solr-style array with two elements |
345 | $facetFields = []; |
346 | foreach ($mergedFacets as $facet => $values) { |
347 | $list = []; |
348 | foreach ($values as $key => $value) { |
349 | $list[] = [$key, $value]; |
350 | } |
351 | $facetFields[$facet] = $list; |
352 | } |
353 | |
354 | return $facetFields; |
355 | } |
356 | |
357 | /** |
358 | * Map facet values from the backends into a merged list |
359 | * |
360 | * @param array $collections Result collections |
361 | * @param array $settings Settings for a single facet field |
362 | * |
363 | * @return array |
364 | */ |
365 | protected function mapFacetValues(array $collections, array $settings): array |
366 | { |
367 | $result = []; |
368 | foreach ($collections as $backendId => $collection) { |
369 | $facets = $collection->getFacets(); |
370 | $facetType = $settings['Type'] ?? 'normal'; |
371 | $mappings = $settings['Mappings'][$backendId] ?? []; |
372 | $backendFacetField = $mappings['Field'] ?? ''; |
373 | if (!$mappings || !$backendFacetField) { |
374 | continue; |
375 | } |
376 | $valueMap = $mappings['Values'] ?? []; |
377 | $unmappedRule = $mappings['Unmapped'] ?? 'keep'; |
378 | $hierarchical = $mappings['Hierarchical'] ?? false; |
379 | foreach ($facets[$backendFacetField] ?? [] as $value => $count) { |
380 | $value = $this->convertFacetValue( |
381 | $value, |
382 | $facetType, |
383 | $unmappedRule, |
384 | $valueMap, |
385 | $hierarchical |
386 | ); |
387 | if ('' === $value) { |
388 | continue; |
389 | } |
390 | |
391 | $result[$value] = ($result[$value] ?? 0) + intval($count); |
392 | if ($hierarchical) { |
393 | foreach ($this->getHierarchyParentKeys($value) as $key) { |
394 | $result[$key] = ($result[$key] ?? 0) + intval($count); |
395 | } |
396 | } |
397 | } |
398 | } |
399 | |
400 | foreach ($settings['Mappings'] as $backendId => $mappings) { |
401 | $ignore = $mappings['Ignore'] ?? false; |
402 | if ($ignore && ($collections[$backendId] ?? false)) { |
403 | $ignoredKeys = is_array($ignore) ? $ignore : array_keys($result); |
404 | foreach ($ignoredKeys as $ignoredValue) { |
405 | $result[$ignoredValue] = ($result[$ignoredValue] ?? 0) |
406 | + $collections[$backendId]->getTotal(); |
407 | } |
408 | } |
409 | } |
410 | |
411 | return $result; |
412 | } |
413 | |
414 | /** |
415 | * Get parent hierarchy keys for a facet value |
416 | * |
417 | * For example with '2/Main/Sub/Shelf/' the result is: |
418 | * [ |
419 | * '1/Main/Sub/', |
420 | * '0/Main/' |
421 | * ] |
422 | * |
423 | * @param string $value Hierarchical facet value |
424 | * |
425 | * @return array |
426 | */ |
427 | protected function getHierarchyParentKeys(string $value): array |
428 | { |
429 | $parts = explode('/', $value); |
430 | $level = array_shift($parts); |
431 | $result = []; |
432 | for ($i = intval($level) - 1; $i >= 0; $i--) { |
433 | $result[] = $i . '/' . implode('/', array_slice($parts, 0, $i + 1)) |
434 | . '/'; |
435 | } |
436 | return $result; |
437 | } |
438 | |
439 | /** |
440 | * Get facet counts for Blender backend facet |
441 | * |
442 | * @param array $collections Collections |
443 | * |
444 | * @return array |
445 | */ |
446 | protected function getBlenderFacetStats(array $collections): array |
447 | { |
448 | $delimiter = $this->getFacetDelimiter('blender_backend'); |
449 | $orFacets = $this->config->Results_Settings->orFacets ?? ''; |
450 | $orFacetList = array_map('trim', explode(',', $orFacets)); |
451 | $isOrFacet = '*' === $orFacets || in_array('blender_backend', $orFacetList); |
452 | $result = []; |
453 | foreach ($this->config->Backends as $backendId => $name) { |
454 | $key = $delimiter ? ($backendId . $delimiter . $name) : $backendId; |
455 | if (isset($collections[$backendId])) { |
456 | if ($total = $collections[$backendId]->getTotal()) { |
457 | $result[$key] = $total; |
458 | } |
459 | } elseif ($isOrFacet) { |
460 | $result[$key] = null; |
461 | } |
462 | } |
463 | return $result; |
464 | } |
465 | |
466 | /** |
467 | * Convert a facet value from a backend |
468 | * |
469 | * @param string $value Facet value |
470 | * @param string $type Facet type |
471 | * @param string $unmapped Unmapped facet handling rule |
472 | * @param array $valueMap Value map for the field |
473 | * @param bool $hierarchical Whether the facet is hierarchical |
474 | * |
475 | * @return string |
476 | */ |
477 | protected function convertFacetValue( |
478 | string $value, |
479 | string $type, |
480 | string $unmapped, |
481 | array $valueMap, |
482 | bool $hierarchical |
483 | ): string { |
484 | if (isset($valueMap[$value])) { |
485 | $value = $valueMap[$value]; |
486 | if ('boolean' === $type) { |
487 | $value = $value ? 'true' : 'false'; |
488 | } |
489 | } elseif ('boolean' === $type || 'drop' === $unmapped) { |
490 | // No mapping defined for boolean facet or "drop" as the Unmapped rule; |
491 | // ignore the value: |
492 | return ''; |
493 | } |
494 | if ($hierarchical && !preg_match('/^\d+\/.+\/$/', $value)) { |
495 | $value = "0/$value/"; |
496 | } |
497 | |
498 | return $value; |
499 | } |
500 | } |