Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 139 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
DeduplicationListener | |
0.00% |
0 / 139 |
|
0.00% |
0 / 10 |
2450 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
attach | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
onSearchPre | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
90 | |||
hasChildFilter | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
onSearchPost | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
fetchLocalRecords | |
0.00% |
0 / 79 |
|
0.00% |
0 / 1 |
506 | |||
getActiveRecordSources | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
appendDedupRecordFields | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
determineSourcePriority | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
determineBuildingPriority | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | |
3 | /** |
4 | * Solr deduplication (merged records) listener. |
5 | * |
6 | * See https://vufind.org/wiki/indexing:deduplication for details on how this is |
7 | * used. |
8 | * |
9 | * PHP version 8 |
10 | * |
11 | * Copyright (C) Villanova University 2013. |
12 | * Copyright (C) The National Library of Finland 2013-2020. |
13 | * |
14 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License version 2, |
16 | * as published by the Free Software Foundation. |
17 | * |
18 | * This program is distributed in the hope that it will be useful, |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 | * GNU General Public License for more details. |
22 | * |
23 | * You should have received a copy of the GNU General Public License |
24 | * along with this program; if not, write to the Free Software |
25 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
26 | * |
27 | * @category VuFind |
28 | * @package Search |
29 | * @author David Maus <maus@hab.de> |
30 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
31 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
32 | * @link https://vufind.org Main Site |
33 | */ |
34 | |
35 | namespace VuFind\Search\Solr; |
36 | |
37 | use Laminas\EventManager\EventInterface; |
38 | use Laminas\EventManager\SharedEventManagerInterface; |
39 | use Psr\Container\ContainerInterface; |
40 | use VuFindSearch\Backend\Solr\Backend; |
41 | use VuFindSearch\Service; |
42 | |
43 | use function in_array; |
44 | |
45 | /** |
46 | * Solr merged record handling listener. |
47 | * |
48 | * @category VuFind |
49 | * @package Search |
50 | * @author David Maus <maus@hab.de> |
51 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
52 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
53 | * @link https://vufind.org Main Site |
54 | */ |
55 | class DeduplicationListener |
56 | { |
57 | /** |
58 | * Backend. |
59 | * |
60 | * @var Backend |
61 | */ |
62 | protected $backend; |
63 | |
64 | /** |
65 | * Service container. |
66 | * |
67 | * @var ContainerInterface |
68 | */ |
69 | protected $serviceLocator; |
70 | |
71 | /** |
72 | * Search configuration file identifier. |
73 | * |
74 | * @var string |
75 | */ |
76 | protected $searchConfig; |
77 | |
78 | /** |
79 | * Data source configuration file identifier. |
80 | * |
81 | * @var string |
82 | */ |
83 | protected $dataSourceConfig; |
84 | |
85 | /** |
86 | * Whether deduplication is enabled. |
87 | * |
88 | * @var bool |
89 | */ |
90 | protected $enabled; |
91 | |
92 | /** |
93 | * Constructor. |
94 | * |
95 | * @param Backend $backend Search backend |
96 | * @param ContainerInterface $serviceLocator Service locator |
97 | * @param string $searchConfig Search config file id |
98 | * @param string $dataSourceConfig Data source file id |
99 | * @param bool $enabled Whether deduplication is |
100 | * enabled |
101 | * |
102 | * @return void |
103 | */ |
104 | public function __construct( |
105 | Backend $backend, |
106 | ContainerInterface $serviceLocator, |
107 | $searchConfig, |
108 | $dataSourceConfig = 'datasources', |
109 | $enabled = true |
110 | ) { |
111 | $this->backend = $backend; |
112 | $this->serviceLocator = $serviceLocator; |
113 | $this->searchConfig = $searchConfig; |
114 | $this->dataSourceConfig = $dataSourceConfig; |
115 | $this->enabled = $enabled; |
116 | } |
117 | |
118 | /** |
119 | * Attach listener to shared event manager. |
120 | * |
121 | * @param SharedEventManagerInterface $manager Shared event manager |
122 | * |
123 | * @return void |
124 | */ |
125 | public function attach( |
126 | SharedEventManagerInterface $manager |
127 | ) { |
128 | $manager->attach( |
129 | Service::class, |
130 | Service::EVENT_PRE, |
131 | [$this, 'onSearchPre'] |
132 | ); |
133 | $manager->attach( |
134 | Service::class, |
135 | Service::EVENT_POST, |
136 | [$this, 'onSearchPost'] |
137 | ); |
138 | } |
139 | |
140 | /** |
141 | * Set up filter for excluding merge children. |
142 | * |
143 | * @param EventInterface $event Event |
144 | * |
145 | * @return EventInterface |
146 | */ |
147 | public function onSearchPre(EventInterface $event) |
148 | { |
149 | $command = $event->getParam('command'); |
150 | if ($command->getTargetIdentifier() === $this->backend->getIdentifier()) { |
151 | $params = $command->getSearchParameters(); |
152 | $context = $command->getContext(); |
153 | $contexts = ['search', 'similar', 'getids', 'workExpressions']; |
154 | if ($params && in_array($context, $contexts)) { |
155 | // If deduplication is enabled, filter out merged child records, |
156 | // otherwise filter out dedup records. |
157 | if ( |
158 | $this->enabled && 'getids' !== $context |
159 | && !$this->hasChildFilter($params) |
160 | ) { |
161 | $fq = '-merged_child_boolean:true'; |
162 | if ($context == 'similar' && $id = $event->getParam('id')) { |
163 | $fq .= ' AND -local_ids_str_mv:"' |
164 | . addcslashes($id, '"') . '"'; |
165 | } |
166 | } else { |
167 | $fq = '-merged_boolean:true'; |
168 | } |
169 | $params->add('fq', $fq); |
170 | } |
171 | } |
172 | return $event; |
173 | } |
174 | |
175 | /** |
176 | * Check search parameters for child records filter |
177 | * |
178 | * @param \VuFindSearch\ParamBag $params Search parameters |
179 | * |
180 | * @return bool |
181 | */ |
182 | public function hasChildFilter($params) |
183 | { |
184 | $filters = $params->get('fq'); |
185 | return $filters != null && in_array('merged_child_boolean:true', $filters); |
186 | } |
187 | |
188 | /** |
189 | * Fetch appropriate dedup child |
190 | * |
191 | * @param EventInterface $event Event |
192 | * |
193 | * @return EventInterface |
194 | */ |
195 | public function onSearchPost(EventInterface $event) |
196 | { |
197 | // Inject deduplication details into record objects: |
198 | $command = $event->getParam('command'); |
199 | |
200 | if ($command->getTargetIdentifier() !== $this->backend->getIdentifier()) { |
201 | return $event; |
202 | } |
203 | $context = $command->getContext(); |
204 | $contexts = ['search', 'similar', 'workExpressions']; |
205 | if ($this->enabled && in_array($context, $contexts)) { |
206 | $this->fetchLocalRecords($event); |
207 | } |
208 | return $event; |
209 | } |
210 | |
211 | /** |
212 | * Fetch local records for all the found dedup records |
213 | * |
214 | * @param EventInterface $event Event |
215 | * |
216 | * @return void |
217 | */ |
218 | protected function fetchLocalRecords($event) |
219 | { |
220 | $config = $this->serviceLocator->get(\VuFind\Config\PluginManager::class); |
221 | $dataSourceConfig = $config->get($this->dataSourceConfig); |
222 | $recordSources = $this->getActiveRecordSources($event); |
223 | $sourcePriority = $this->determineSourcePriority($recordSources); |
224 | $command = $event->getParam('command'); |
225 | $params = $command->getSearchParameters(); |
226 | $buildingPriority = $this->determineBuildingPriority($params); |
227 | |
228 | $idList = []; |
229 | // Find out the best records and list their IDs: |
230 | $result = $command->getResult(); |
231 | foreach ($result->getRecords() as $record) { |
232 | $fields = $record->getRawData(); |
233 | |
234 | if (!isset($fields['merged_boolean'])) { |
235 | continue; |
236 | } |
237 | $localIds = $fields['local_ids_str_mv']; |
238 | $dedupId = $localIds[0]; |
239 | $priority = 99999; |
240 | $undefPriority = 99999; |
241 | // Find the document that matches the source priority best: |
242 | $dedupData = []; |
243 | foreach ($localIds as $localId) { |
244 | $localPriority = null; |
245 | [$source] = explode('.', $localId, 2); |
246 | // Ignore ID if source is not in the list of allowed record sources: |
247 | if ($recordSources && !in_array($source, $recordSources)) { |
248 | continue; |
249 | } |
250 | if (!empty($buildingPriority)) { |
251 | if (isset($buildingPriority[$source])) { |
252 | $localPriority = -$buildingPriority[$source]; |
253 | } elseif (isset($dataSourceConfig[$source]['institution'])) { |
254 | $institution = $dataSourceConfig[$source]['institution']; |
255 | if (isset($buildingPriority[$institution])) { |
256 | $localPriority = -$buildingPriority[$institution]; |
257 | } |
258 | } |
259 | } |
260 | if (!isset($localPriority)) { |
261 | if (isset($sourcePriority[$source])) { |
262 | $localPriority = $sourcePriority[$source]; |
263 | } else { |
264 | $localPriority = ++$undefPriority; |
265 | } |
266 | } |
267 | if ($localPriority < $priority) { |
268 | $dedupId = $localId; |
269 | $priority = $localPriority; |
270 | } |
271 | $dedupData[$source] = [ |
272 | 'id' => $localId, |
273 | 'priority' => $localPriority, |
274 | ]; |
275 | } |
276 | $fields['dedup_id'] = $dedupId; |
277 | $idList[] = $dedupId; |
278 | |
279 | // Sort dedupData by priority: |
280 | uasort( |
281 | $dedupData, |
282 | function ($a, $b) { |
283 | return $a['priority'] - $b['priority']; |
284 | } |
285 | ); |
286 | $fields['dedup_data'] = $dedupData; |
287 | $record->setRawData($fields); |
288 | } |
289 | if (empty($idList)) { |
290 | return; |
291 | } |
292 | |
293 | // Fetch records and assign them to the result: |
294 | $localRecords = $this->backend->retrieveBatch($idList)->getRecords(); |
295 | foreach ($result->getRecords() as $record) { |
296 | $dedupRecordData = $record->getRawData(); |
297 | if (!isset($dedupRecordData['dedup_id'])) { |
298 | continue; |
299 | } |
300 | // Find the corresponding local record in the results: |
301 | $foundLocalRecord = null; |
302 | foreach ($localRecords as $localRecord) { |
303 | if ($localRecord->getUniqueID() == $dedupRecordData['dedup_id']) { |
304 | $foundLocalRecord = $localRecord; |
305 | break; |
306 | } |
307 | } |
308 | if (!$foundLocalRecord) { |
309 | continue; |
310 | } |
311 | |
312 | $localRecordData = $foundLocalRecord->getRawData(); |
313 | |
314 | // Copy dedup_data for the active data sources: |
315 | foreach ($dedupRecordData['dedup_data'] as $dedupDataKey => $dedupData) { |
316 | if (!$recordSources || isset($sourcePriority[$dedupDataKey])) { |
317 | $localRecordData['dedup_data'][$dedupDataKey] = $dedupData; |
318 | } |
319 | } |
320 | |
321 | // Copy fields from dedup record to local record |
322 | $localRecordData = $this->appendDedupRecordFields( |
323 | $localRecordData, |
324 | $dedupRecordData, |
325 | $recordSources, |
326 | $sourcePriority |
327 | ); |
328 | $foundLocalRecord->setRawData($localRecordData); |
329 | $foundLocalRecord->setHighlightDetails($record->getHighlightDetails()); |
330 | $foundLocalRecord->setLabels($record->getLabels()); |
331 | $result->replace($record, $foundLocalRecord); |
332 | } |
333 | } |
334 | |
335 | /** |
336 | * Get currently active record sources. |
337 | * |
338 | * @param EventInterface $event Event |
339 | * |
340 | * @return array |
341 | * |
342 | * @SuppressWarnings(PHPMD.UnusedFormalParameter) |
343 | */ |
344 | protected function getActiveRecordSources($event): array |
345 | { |
346 | $config = $this->serviceLocator->get(\VuFind\Config\PluginManager::class); |
347 | $searchConfig = $config->get($this->searchConfig); |
348 | return !empty($searchConfig->Records->sources) |
349 | ? explode(',', $searchConfig->Records->sources) |
350 | : []; |
351 | } |
352 | |
353 | /** |
354 | * Append fields from dedup record to the selected local record. Note: the last |
355 | * two parameters are unused in this default method, but they may be useful for |
356 | * custom behavior in subclasses. |
357 | * |
358 | * @param array $localRecordData Local record data |
359 | * @param array $dedupRecordData Dedup record data |
360 | * @param array $recordSources List of active record sources, empty if all |
361 | * @param array $sourcePriority Array of source priorities keyed by source id |
362 | * |
363 | * @return array Local record data |
364 | * |
365 | * @SuppressWarnings(PHPMD.UnusedFormalParameter) |
366 | */ |
367 | protected function appendDedupRecordFields( |
368 | $localRecordData, |
369 | $dedupRecordData, |
370 | $recordSources, |
371 | $sourcePriority |
372 | ) { |
373 | $localRecordData['local_ids_str_mv'] = $dedupRecordData['local_ids_str_mv']; |
374 | return $localRecordData; |
375 | } |
376 | |
377 | /** |
378 | * Function that determines the priority for sources |
379 | * |
380 | * @param array $recordSources Record sources defined in searches.ini |
381 | * |
382 | * @return array Array keyed by source with priority as the value |
383 | */ |
384 | protected function determineSourcePriority($recordSources) |
385 | { |
386 | if (empty($recordSources)) { |
387 | return []; |
388 | } |
389 | return array_flip($recordSources); |
390 | } |
391 | |
392 | /** |
393 | * Function that determines the priority for buildings |
394 | * |
395 | * @param \VuFindSearch\ParamBag $params Query parameters |
396 | * |
397 | * @return array Array keyed by building with priority as the value |
398 | */ |
399 | protected function determineBuildingPriority($params) |
400 | { |
401 | $result = []; |
402 | foreach ($params->get('fq') as $fq) { |
403 | if (preg_match_all('/\bbuilding:"([^"]+)"/', $fq, $matches)) { |
404 | $values = $matches[1]; |
405 | foreach ($values as $value) { |
406 | if (preg_match('/^\d+\/([^\/]+?)\//', $value, $matches)) { |
407 | // Hierarchical facets; take only first level: |
408 | $result[] = $matches[1]; |
409 | } else { |
410 | $result[] = $value; |
411 | } |
412 | } |
413 | } |
414 | } |
415 | |
416 | array_unshift($result, ''); |
417 | return array_flip($result); |
418 | } |
419 | } |