Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 139
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
DeduplicationListener
0.00% covered (danger)
0.00%
0 / 139
0.00% covered (danger)
0.00%
0 / 10
2450
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 attach
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 onSearchPre
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
90
 hasChildFilter
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 onSearchPost
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
20
 fetchLocalRecords
0.00% covered (danger)
0.00%
0 / 79
0.00% covered (danger)
0.00%
0 / 1
506
 getActiveRecordSources
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
6
 appendDedupRecordFields
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 determineSourcePriority
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 determineBuildingPriority
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
30
1<?php
2
3/**
4 * Solr deduplication (merged records) listener.
5 *
6 * See https://vufind.org/wiki/indexing:deduplication for details on how this is
7 * used.
8 *
9 * PHP version 8
10 *
11 * Copyright (C) Villanova University 2013.
12 * Copyright (C) The National Library of Finland 2013-2020.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2,
16 * as published by the Free Software Foundation.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
26 *
27 * @category VuFind
28 * @package  Search
29 * @author   David Maus <maus@hab.de>
30 * @author   Ere Maijala <ere.maijala@helsinki.fi>
31 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
32 * @link     https://vufind.org Main Site
33 */
34
35namespace VuFind\Search\Solr;
36
37use Laminas\EventManager\EventInterface;
38use Laminas\EventManager\SharedEventManagerInterface;
39use Psr\Container\ContainerInterface;
40use VuFindSearch\Backend\Solr\Backend;
41use VuFindSearch\Service;
42
43use function in_array;
44
45/**
46 * Solr merged record handling listener.
47 *
48 * @category VuFind
49 * @package  Search
50 * @author   David Maus <maus@hab.de>
51 * @author   Ere Maijala <ere.maijala@helsinki.fi>
52 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
53 * @link     https://vufind.org Main Site
54 */
55class DeduplicationListener
56{
57    /**
58     * Backend.
59     *
60     * @var Backend
61     */
62    protected $backend;
63
64    /**
65     * Service container.
66     *
67     * @var ContainerInterface
68     */
69    protected $serviceLocator;
70
71    /**
72     * Search configuration file identifier.
73     *
74     * @var string
75     */
76    protected $searchConfig;
77
78    /**
79     * Data source configuration file identifier.
80     *
81     * @var string
82     */
83    protected $dataSourceConfig;
84
85    /**
86     * Whether deduplication is enabled.
87     *
88     * @var bool
89     */
90    protected $enabled;
91
92    /**
93     * Constructor.
94     *
95     * @param Backend            $backend          Search backend
96     * @param ContainerInterface $serviceLocator   Service locator
97     * @param string             $searchConfig     Search config file id
98     * @param string             $dataSourceConfig Data source file id
99     * @param bool               $enabled          Whether deduplication is
100     * enabled
101     *
102     * @return void
103     */
104    public function __construct(
105        Backend $backend,
106        ContainerInterface $serviceLocator,
107        $searchConfig,
108        $dataSourceConfig = 'datasources',
109        $enabled = true
110    ) {
111        $this->backend = $backend;
112        $this->serviceLocator = $serviceLocator;
113        $this->searchConfig = $searchConfig;
114        $this->dataSourceConfig = $dataSourceConfig;
115        $this->enabled = $enabled;
116    }
117
118    /**
119     * Attach listener to shared event manager.
120     *
121     * @param SharedEventManagerInterface $manager Shared event manager
122     *
123     * @return void
124     */
125    public function attach(
126        SharedEventManagerInterface $manager
127    ) {
128        $manager->attach(
129            Service::class,
130            Service::EVENT_PRE,
131            [$this, 'onSearchPre']
132        );
133        $manager->attach(
134            Service::class,
135            Service::EVENT_POST,
136            [$this, 'onSearchPost']
137        );
138    }
139
140    /**
141     * Set up filter for excluding merge children.
142     *
143     * @param EventInterface $event Event
144     *
145     * @return EventInterface
146     */
147    public function onSearchPre(EventInterface $event)
148    {
149        $command = $event->getParam('command');
150        if ($command->getTargetIdentifier() === $this->backend->getIdentifier()) {
151            $params = $command->getSearchParameters();
152            $context = $command->getContext();
153            $contexts = ['search', 'similar', 'getids', 'workExpressions'];
154            if ($params && in_array($context, $contexts)) {
155                // If deduplication is enabled, filter out merged child records,
156                // otherwise filter out dedup records.
157                if (
158                    $this->enabled && 'getids' !== $context
159                    && !$this->hasChildFilter($params)
160                ) {
161                    $fq = '-merged_child_boolean:true';
162                    if ($context == 'similar' && $id = $event->getParam('id')) {
163                        $fq .= ' AND -local_ids_str_mv:"'
164                            . addcslashes($id, '"') . '"';
165                    }
166                } else {
167                    $fq = '-merged_boolean:true';
168                }
169                $params->add('fq', $fq);
170            }
171        }
172        return $event;
173    }
174
175    /**
176     * Check search parameters for child records filter
177     *
178     * @param \VuFindSearch\ParamBag $params Search parameters
179     *
180     * @return bool
181     */
182    public function hasChildFilter($params)
183    {
184        $filters = $params->get('fq');
185        return $filters != null && in_array('merged_child_boolean:true', $filters);
186    }
187
188    /**
189     * Fetch appropriate dedup child
190     *
191     * @param EventInterface $event Event
192     *
193     * @return EventInterface
194     */
195    public function onSearchPost(EventInterface $event)
196    {
197        // Inject deduplication details into record objects:
198        $command = $event->getParam('command');
199
200        if ($command->getTargetIdentifier() !== $this->backend->getIdentifier()) {
201            return $event;
202        }
203        $context = $command->getContext();
204        $contexts = ['search', 'similar', 'workExpressions'];
205        if ($this->enabled && in_array($context, $contexts)) {
206            $this->fetchLocalRecords($event);
207        }
208        return $event;
209    }
210
211    /**
212     * Fetch local records for all the found dedup records
213     *
214     * @param EventInterface $event Event
215     *
216     * @return void
217     */
218    protected function fetchLocalRecords($event)
219    {
220        $config = $this->serviceLocator->get(\VuFind\Config\PluginManager::class);
221        $dataSourceConfig = $config->get($this->dataSourceConfig);
222        $recordSources = $this->getActiveRecordSources($event);
223        $sourcePriority = $this->determineSourcePriority($recordSources);
224        $command = $event->getParam('command');
225        $params = $command->getSearchParameters();
226        $buildingPriority = $this->determineBuildingPriority($params);
227
228        $idList = [];
229        // Find out the best records and list their IDs:
230        $result = $command->getResult();
231        foreach ($result->getRecords() as $record) {
232            $fields = $record->getRawData();
233
234            if (!isset($fields['merged_boolean'])) {
235                continue;
236            }
237            $localIds = $fields['local_ids_str_mv'];
238            $dedupId = $localIds[0];
239            $priority = 99999;
240            $undefPriority = 99999;
241            // Find the document that matches the source priority best:
242            $dedupData = [];
243            foreach ($localIds as $localId) {
244                $localPriority = null;
245                [$source] = explode('.', $localId, 2);
246                // Ignore ID if source is not in the list of allowed record sources:
247                if ($recordSources && !in_array($source, $recordSources)) {
248                    continue;
249                }
250                if (!empty($buildingPriority)) {
251                    if (isset($buildingPriority[$source])) {
252                        $localPriority = -$buildingPriority[$source];
253                    } elseif (isset($dataSourceConfig[$source]['institution'])) {
254                        $institution = $dataSourceConfig[$source]['institution'];
255                        if (isset($buildingPriority[$institution])) {
256                            $localPriority = -$buildingPriority[$institution];
257                        }
258                    }
259                }
260                if (!isset($localPriority)) {
261                    if (isset($sourcePriority[$source])) {
262                        $localPriority = $sourcePriority[$source];
263                    } else {
264                        $localPriority = ++$undefPriority;
265                    }
266                }
267                if ($localPriority < $priority) {
268                    $dedupId = $localId;
269                    $priority = $localPriority;
270                }
271                $dedupData[$source] = [
272                    'id' => $localId,
273                    'priority' => $localPriority,
274                ];
275            }
276            $fields['dedup_id'] = $dedupId;
277            $idList[] = $dedupId;
278
279            // Sort dedupData by priority:
280            uasort(
281                $dedupData,
282                function ($a, $b) {
283                    return $a['priority'] - $b['priority'];
284                }
285            );
286            $fields['dedup_data'] = $dedupData;
287            $record->setRawData($fields);
288        }
289        if (empty($idList)) {
290            return;
291        }
292
293        // Fetch records and assign them to the result:
294        $localRecords = $this->backend->retrieveBatch($idList)->getRecords();
295        foreach ($result->getRecords() as $record) {
296            $dedupRecordData = $record->getRawData();
297            if (!isset($dedupRecordData['dedup_id'])) {
298                continue;
299            }
300            // Find the corresponding local record in the results:
301            $foundLocalRecord = null;
302            foreach ($localRecords as $localRecord) {
303                if ($localRecord->getUniqueID() == $dedupRecordData['dedup_id']) {
304                    $foundLocalRecord = $localRecord;
305                    break;
306                }
307            }
308            if (!$foundLocalRecord) {
309                continue;
310            }
311
312            $localRecordData = $foundLocalRecord->getRawData();
313
314            // Copy dedup_data for the active data sources:
315            foreach ($dedupRecordData['dedup_data'] as $dedupDataKey => $dedupData) {
316                if (!$recordSources || isset($sourcePriority[$dedupDataKey])) {
317                    $localRecordData['dedup_data'][$dedupDataKey] = $dedupData;
318                }
319            }
320
321            // Copy fields from dedup record to local record
322            $localRecordData = $this->appendDedupRecordFields(
323                $localRecordData,
324                $dedupRecordData,
325                $recordSources,
326                $sourcePriority
327            );
328            $foundLocalRecord->setRawData($localRecordData);
329            $foundLocalRecord->setHighlightDetails($record->getHighlightDetails());
330            $foundLocalRecord->setLabels($record->getLabels());
331            $result->replace($record, $foundLocalRecord);
332        }
333    }
334
335    /**
336     * Get currently active record sources.
337     *
338     * @param EventInterface $event Event
339     *
340     * @return array
341     *
342     * @SuppressWarnings(PHPMD.UnusedFormalParameter)
343     */
344    protected function getActiveRecordSources($event): array
345    {
346        $config = $this->serviceLocator->get(\VuFind\Config\PluginManager::class);
347        $searchConfig = $config->get($this->searchConfig);
348        return !empty($searchConfig->Records->sources)
349            ? explode(',', $searchConfig->Records->sources)
350            : [];
351    }
352
353    /**
354     * Append fields from dedup record to the selected local record. Note: the last
355     * two parameters are unused in this default method, but they may be useful for
356     * custom behavior in subclasses.
357     *
358     * @param array $localRecordData Local record data
359     * @param array $dedupRecordData Dedup record data
360     * @param array $recordSources   List of active record sources, empty if all
361     * @param array $sourcePriority  Array of source priorities keyed by source id
362     *
363     * @return array Local record data
364     *
365     * @SuppressWarnings(PHPMD.UnusedFormalParameter)
366     */
367    protected function appendDedupRecordFields(
368        $localRecordData,
369        $dedupRecordData,
370        $recordSources,
371        $sourcePriority
372    ) {
373        $localRecordData['local_ids_str_mv'] = $dedupRecordData['local_ids_str_mv'];
374        return $localRecordData;
375    }
376
377    /**
378     * Function that determines the priority for sources
379     *
380     * @param array $recordSources Record sources defined in searches.ini
381     *
382     * @return array Array keyed by source with priority as the value
383     */
384    protected function determineSourcePriority($recordSources)
385    {
386        if (empty($recordSources)) {
387            return [];
388        }
389        return array_flip($recordSources);
390    }
391
392    /**
393     * Function that determines the priority for buildings
394     *
395     * @param \VuFindSearch\ParamBag $params Query parameters
396     *
397     * @return array Array keyed by building with priority as the value
398     */
399    protected function determineBuildingPriority($params)
400    {
401        $result = [];
402        foreach ($params->get('fq') as $fq) {
403            if (preg_match_all('/\bbuilding:"([^"]+)"/', $fq, $matches)) {
404                $values = $matches[1];
405                foreach ($values as $value) {
406                    if (preg_match('/^\d+\/([^\/]+?)\//', $value, $matches)) {
407                        // Hierarchical facets; take only first level:
408                        $result[] = $matches[1];
409                    } else {
410                        $result[] = $value;
411                    }
412                }
413            }
414        }
415
416        array_unshift($result, '');
417        return array_flip($result);
418    }
419}