Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
87.43% covered (warning)
87.43%
160 / 183
92.00% covered (success)
92.00%
23 / 25
CRAP
0.00% covered (danger)
0.00%
0 / 1
Backend
87.43% covered (warning)
87.43%
160 / 183
92.00% covered (success)
92.00%
23 / 25
73.39
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setPageSize
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 search
83.33% covered (warning)
83.33%
5 / 6
0.00% covered (danger)
0.00%
0 / 1
2.02
 rawJsonSearch
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 getExtraRequestDetails
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 resetExtraRequestDetails
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getIds
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 random
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 retrieve
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 retrieveBatch
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
5
 similar
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 terms
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
9
 alphabeticBrowse
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
3
 writeDocument
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 setQueryBuilder
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getQueryBuilder
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 setSimilarBuilder
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getSimilarBuilder
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getRecordCollectionFactory
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getConnector
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 createRecordCollection
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 deserialize
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 refineBrowseException
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
4
 injectResponseWriter
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
5
 workKeysSearch
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
72
1<?php
2
3/**
4 * SOLR backend.
5 *
6 * PHP version 8
7 *
8 * Copyright (C) Villanova University 2010.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2,
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 *
23 * @category VuFind
24 * @package  Search
25 * @author   David Maus <maus@hab.de>
26 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
27 * @link     https://vufind.org
28 */
29
30namespace VuFindSearch\Backend\Solr;
31
32use VuFindSearch\Backend\AbstractBackend;
33use VuFindSearch\Backend\Exception\BackendException;
34use VuFindSearch\Backend\Exception\RemoteErrorException;
35use VuFindSearch\Backend\Solr\Document\DocumentInterface;
36use VuFindSearch\Backend\Solr\Response\Json\Terms;
37use VuFindSearch\Exception\InvalidArgumentException;
38use VuFindSearch\Feature\ExtraRequestDetailsInterface;
39use VuFindSearch\Feature\GetIdsInterface;
40use VuFindSearch\Feature\RandomInterface;
41use VuFindSearch\Feature\RetrieveBatchInterface;
42use VuFindSearch\Feature\SimilarInterface;
43use VuFindSearch\ParamBag;
44use VuFindSearch\Query\AbstractQuery;
45use VuFindSearch\Query\WorkKeysQuery;
46use VuFindSearch\Response\RecordCollectionFactoryInterface;
47use VuFindSearch\Response\RecordCollectionInterface;
48
49use function count;
50use function is_int;
51
52/**
53 * SOLR backend.
54 *
55 * @category VuFind
56 * @package  Search
57 * @author   David Maus <maus@hab.de>
58 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
59 * @link     https://vufind.org
60 */
61class Backend extends AbstractBackend implements
62    SimilarInterface,
63    RetrieveBatchInterface,
64    RandomInterface,
65    ExtraRequestDetailsInterface,
66    GetIdsInterface
67{
68    /**
69     * Limit for records per query in a batch retrieval.
70     *
71     * @var int
72     */
73    protected $pageSize = 100;
74
75    /**
76     * Connector.
77     *
78     * @var Connector
79     */
80    protected $connector;
81
82    /**
83     * Query builder.
84     *
85     * @var QueryBuilder
86     */
87    protected $queryBuilder = null;
88
89    /**
90     * Similar records query builder.
91     *
92     * @var SimilarBuilder
93     */
94    protected $similarBuilder = null;
95
96    /**
97     * Constructor.
98     *
99     * @param Connector $connector SOLR connector
100     *
101     * @return void
102     */
103    public function __construct(Connector $connector)
104    {
105        $this->connector    = $connector;
106        $this->identifier   = null;
107    }
108
109    /**
110     * Set the limit for batch queries
111     *
112     * @param int $pageSize Records per Query
113     *
114     * @return void
115     */
116    public function setPageSize($pageSize)
117    {
118        $this->pageSize = $pageSize;
119    }
120
121    /**
122     * Perform a search and return record collection.
123     *
124     * @param AbstractQuery $query  Search query
125     * @param int           $offset Search offset
126     * @param int           $limit  Search limit
127     * @param ParamBag      $params Search backend parameters
128     *
129     * @return RecordCollectionInterface
130     */
131    public function search(
132        AbstractQuery $query,
133        $offset,
134        $limit,
135        ParamBag $params = null
136    ) {
137        if ($query instanceof WorkKeysQuery) {
138            return $this->workKeysSearch($query, $offset, $limit, $params);
139        }
140        $json = $this->rawJsonSearch($query, $offset, $limit, $params);
141        $collection = $this->createRecordCollection($json);
142        $this->injectSourceIdentifier($collection);
143
144        return $collection;
145    }
146
147    /**
148     * Perform a search and return a raw response.
149     *
150     * @param AbstractQuery $query  Search query
151     * @param int           $offset Search offset
152     * @param int           $limit  Search limit
153     * @param ParamBag      $params Search backend parameters
154     *
155     * @return string
156     */
157    public function rawJsonSearch(
158        AbstractQuery $query,
159        $offset,
160        $limit,
161        ParamBag $params = null
162    ) {
163        $params = $params ?: new ParamBag();
164        $this->injectResponseWriter($params);
165
166        $params->set('rows', $limit);
167        $params->set('start', $offset);
168        $params->mergeWith($this->getQueryBuilder()->build($query, $params));
169        return $this->connector->search($params);
170    }
171
172    /**
173     * Returns some extra details about the search.
174     *
175     * @return array
176     */
177    public function getExtraRequestDetails()
178    {
179        return [
180            'solrRequestUrl' => $this->connector->getLastUrl(),
181        ];
182    }
183
184    /**
185     * Clears all accumulated extra request details
186     *
187     * @return void
188     */
189    public function resetExtraRequestDetails()
190    {
191        $this->connector->resetLastUrl();
192    }
193
194    /**
195     * Perform a search and return record collection of only record identifiers.
196     *
197     * @param AbstractQuery $query  Search query
198     * @param int           $offset Search offset
199     * @param int           $limit  Search limit
200     * @param ParamBag      $params Search backend parameters
201     *
202     * @return RecordCollectionInterface
203     */
204    public function getIds(
205        AbstractQuery $query,
206        $offset,
207        $limit,
208        ParamBag $params = null
209    ) {
210        $params = $params ?: new ParamBag();
211        $this->injectResponseWriter($params);
212
213        $params->set('rows', $limit);
214        $params->set('start', $offset);
215        $flParts = [$this->getConnector()->getUniqueKey()];
216        if ($fl = $params->get('fl')) {
217            // Merge multiple values if necessary, then split on delimiter:
218            $flParts = array_unique(array_merge($flParts, explode(',', implode(',', $fl))));
219        }
220        $params->set('fl', implode(',', $flParts));
221        $params->mergeWith($this->getQueryBuilder()->build($query));
222        $response   = $this->connector->search($params);
223        $collection = $this->createRecordCollection($response);
224        $this->injectSourceIdentifier($collection);
225
226        return $collection;
227    }
228
229    /**
230     * Get Random records
231     *
232     * @param AbstractQuery $query  Search query
233     * @param int           $limit  Search limit
234     * @param ParamBag      $params Search backend parameters
235     *
236     * @return RecordCollectionInterface
237     */
238    public function random(
239        AbstractQuery $query,
240        $limit,
241        ParamBag $params = null
242    ) {
243        $params = $params ?: new ParamBag();
244        $this->injectResponseWriter($params);
245
246        $random = rand(0, 1000000);
247        $sort = "{$random}_random asc";
248        $params->set('sort', $sort);
249
250        return $this->search($query, 0, $limit, $params);
251    }
252
253    /**
254     * Retrieve a single document.
255     *
256     * @param string   $id     Document identifier
257     * @param ParamBag $params Search backend parameters
258     *
259     * @return RecordCollectionInterface
260     */
261    public function retrieve($id, ParamBag $params = null)
262    {
263        $params = $params ?: new ParamBag();
264        $this->injectResponseWriter($params);
265
266        $response   = $this->connector->retrieve($id, $params);
267        $collection = $this->createRecordCollection($response);
268        $this->injectSourceIdentifier($collection);
269        return $collection;
270    }
271
272    /**
273     * Retrieve a batch of documents.
274     *
275     * @param array    $ids    Array of document identifiers
276     * @param ParamBag $params Search backend parameters
277     *
278     * @return RecordCollectionInterface
279     */
280    public function retrieveBatch($ids, ParamBag $params = null)
281    {
282        $params = $params ?: new ParamBag();
283
284        // Callback function for formatting IDs:
285        $formatIds = function ($i) {
286            return '"' . addcslashes($i, '"') . '"';
287        };
288
289        // Retrieve records a page at a time:
290        $results = false;
291        while (count($ids) > 0) {
292            $currentPage = array_splice($ids, 0, $this->pageSize, []);
293            $currentPage = array_map($formatIds, $currentPage);
294            $params->set('q', 'id:(' . implode(' OR ', $currentPage) . ')');
295            $params->set('start', 0);
296            $params->set('rows', $this->pageSize);
297            $this->injectResponseWriter($params);
298            $next = $this->createRecordCollection(
299                $this->connector->search($params)
300            );
301            if (!$results) {
302                $results = $next;
303            } else {
304                foreach ($next->getRecords() as $record) {
305                    $results->add($record);
306                }
307            }
308        }
309        $this->injectSourceIdentifier($results);
310        return $results;
311    }
312
313    /**
314     * Return similar records.
315     *
316     * @param string   $id     Id of record to compare with
317     * @param ParamBag $params Search backend parameters
318     *
319     * @return RecordCollectionInterface
320     */
321    public function similar($id, ParamBag $params = null)
322    {
323        $params = $params ?: new ParamBag();
324        $this->injectResponseWriter($params);
325
326        $params->mergeWith($this->getSimilarBuilder()->build($id));
327        $response   = $this->connector->similar($id, $params);
328        $collection = $this->createRecordCollection($response);
329        $this->injectSourceIdentifier($collection);
330        return $collection;
331    }
332
333    /**
334     * Return terms from SOLR index.
335     *
336     * @param string   $field  Index field
337     * @param string   $start  Starting term (blank for beginning of list)
338     * @param int      $limit  Maximum number of terms
339     * @param ParamBag $params Additional parameters
340     *
341     * @return Terms
342     */
343    public function terms(
344        $field = null,
345        $start = null,
346        $limit = null,
347        ParamBag $params = null
348    ) {
349        // Support alternate syntax with ParamBag as first parameter:
350        if ($field instanceof ParamBag && $params === null) {
351            $params = $field;
352            $field = null;
353        }
354
355        // Create empty ParamBag if none provided:
356        $params = $params ?: new ParamBag();
357        $this->injectResponseWriter($params);
358
359        // Always enable terms:
360        $params->set('terms', 'true');
361
362        // Use parameters if provided:
363        if (null !== $field) {
364            $params->set('terms.fl', $field);
365        }
366        if (null !== $start) {
367            $params->set('terms.lower', $start);
368        }
369        if (null !== $limit) {
370            $params->set('terms.limit', $limit);
371        }
372
373        // Set defaults unless overridden:
374        if (!$params->hasParam('terms.lower.incl')) {
375            $params->set('terms.lower.incl', 'false');
376        }
377        if (!$params->hasParam('terms.sort')) {
378            $params->set('terms.sort', 'index');
379        }
380
381        $response = $this->connector->terms($params);
382        $terms = new Terms($this->deserialize($response));
383        return $terms;
384    }
385
386    /**
387     * Obtain information from an alphabetic browse index.
388     *
389     * @param string   $source      Name of index to search
390     * @param string   $from        Starting point for browse results
391     * @param int      $page        Result page to return (starts at 0)
392     * @param int      $limit       Number of results to return on each page
393     * @param ParamBag $params      Additional parameters
394     * @param int      $offsetDelta Delta to use when calculating page
395     * offset (useful for showing a few results above the highlighted row)
396     *
397     * @return array
398     */
399    public function alphabeticBrowse(
400        $source,
401        $from,
402        $page,
403        $limit = 20,
404        $params = null,
405        $offsetDelta = 0
406    ) {
407        $params = $params ?: new ParamBag();
408        $this->injectResponseWriter($params);
409
410        $params->set('from', $from);
411        $params->set('offset', ($page * $limit) + $offsetDelta);
412        $params->set('rows', $limit);
413        $params->set('source', $source);
414
415        $response = null;
416        try {
417            $response = $this->connector->query('browse', $params);
418        } catch (RemoteErrorException $e) {
419            $this->refineBrowseException($e);
420        }
421        return $this->deserialize($response);
422    }
423
424    /**
425     * Write a document to Solr. Return an array of details about the updated index.
426     *
427     * @param DocumentInterface $doc     Document to write
428     * @param ?int              $timeout Timeout value (null for default)
429     * @param string            $handler Handler to use
430     * @param ?ParamBag         $params  Search backend parameters
431     *
432     * @return array
433     */
434    public function writeDocument(
435        DocumentInterface $doc,
436        int $timeout = null,
437        string $handler = 'update',
438        ?ParamBag $params = null
439    ) {
440        $connector = $this->getConnector();
441
442        // Write!
443        $connector->callWithHttpOptions(
444            is_int($timeout ?? null) ? compact('timeout') : [],
445            'write',
446            $doc,
447            $handler,
448            $params
449        );
450
451        // Save the core name in the results in case the caller needs it.
452        return ['core' => $connector->getCore()];
453    }
454
455    /**
456     * Set the query builder.
457     *
458     * @param QueryBuilder $queryBuilder Query builder
459     *
460     * @return void
461     */
462    public function setQueryBuilder(QueryBuilder $queryBuilder)
463    {
464        $this->queryBuilder = $queryBuilder;
465    }
466
467    /**
468     * Return query builder.
469     *
470     * Lazy loads an empty default QueryBuilder if none was set.
471     *
472     * @return QueryBuilder
473     */
474    public function getQueryBuilder()
475    {
476        if (!$this->queryBuilder) {
477            $this->queryBuilder = new QueryBuilder();
478        }
479        return $this->queryBuilder;
480    }
481
482    /**
483     * Set the similar records query builder.
484     *
485     * @param SimilarBuilder $similarBuilder Similar builder
486     *
487     * @return void
488     */
489    public function setSimilarBuilder(SimilarBuilder $similarBuilder)
490    {
491        $this->similarBuilder = $similarBuilder;
492    }
493
494    /**
495     * Return similar records query builder.
496     *
497     * Lazy loads an empty default SimilarBuilder if none was set.
498     *
499     * @return SimilarBuilder
500     */
501    public function getSimilarBuilder()
502    {
503        if (!$this->similarBuilder) {
504            $this->similarBuilder = new SimilarBuilder();
505        }
506        return $this->similarBuilder;
507    }
508
509    /**
510     * Return the record collection factory.
511     *
512     * Lazy loads a generic collection factory.
513     *
514     * @return RecordCollectionFactoryInterface
515     */
516    public function getRecordCollectionFactory()
517    {
518        if (!$this->collectionFactory) {
519            $this->collectionFactory = new Response\Json\RecordCollectionFactory();
520        }
521        return $this->collectionFactory;
522    }
523
524    /**
525     * Return the SOLR connector.
526     *
527     * @return Connector
528     */
529    public function getConnector()
530    {
531        return $this->connector;
532    }
533
534    /// Internal API
535
536    /**
537     * Create record collection.
538     *
539     * @param string $json Serialized JSON response
540     *
541     * @return RecordCollectionInterface
542     */
543    protected function createRecordCollection($json)
544    {
545        return $this->getRecordCollectionFactory()
546            ->factory($this->deserialize($json));
547    }
548
549    /**
550     * Deserialize JSON response.
551     *
552     * @param string $json Serialized JSON response
553     *
554     * @return array
555     *
556     * @throws BackendException Deserialization error
557     */
558    protected function deserialize($json)
559    {
560        $response = json_decode($json, true);
561        $error    = json_last_error();
562        if ($error != \JSON_ERROR_NONE) {
563            throw new BackendException(
564                sprintf('JSON decoding error: %s -- %s', $error, $json)
565            );
566        }
567        $qtime = $response['responseHeader']['QTime'] ?? 'n/a';
568        $this->log('debug', 'Deserialized SOLR response', ['qtime' => $qtime]);
569        return $response;
570    }
571
572    /**
573     * Improve the exception message for alphaBrowse errors when appropriate.
574     *
575     * @param RemoteErrorException $e Exception to clean up
576     *
577     * @return void
578     * @throws RemoteErrorException
579     */
580    protected function refineBrowseException(RemoteErrorException $e)
581    {
582        $error = $e->getMessage() . $e->getResponse();
583        if (
584            strstr($error, 'does not exist') || strstr($error, 'no such table')
585            || strstr($error, 'couldn\'t find a browse index')
586        ) {
587            throw new RemoteErrorException(
588                'Alphabetic Browse index missing.  See ' .
589                'https://vufind.org/wiki/indexing:alphabetical_heading_browse for ' .
590                'details on generating the index.',
591                $e->getCode(),
592                $e->getResponse(),
593                $e->getPrevious()
594            );
595        }
596        throw $e;
597    }
598
599    /**
600     * Inject response writer and named list implementation into parameters.
601     *
602     * @param ParamBag $params Parameters
603     *
604     * @return void
605     *
606     * @throws InvalidArgumentException Response writer and named list
607     * implementation already set to an incompatible type.
608     */
609    protected function injectResponseWriter(ParamBag $params)
610    {
611        if (array_diff($params->get('wt') ?: [], ['json'])) {
612            throw new InvalidArgumentException(
613                sprintf(
614                    'Invalid response writer type: %s',
615                    implode(', ', $params->get('wt'))
616                )
617            );
618        }
619        if (array_diff($params->get('json.nl') ?: [], ['arrarr'])) {
620            throw new InvalidArgumentException(
621                sprintf(
622                    'Invalid named list implementation type: %s',
623                    implode(', ', $params->get('json.nl'))
624                )
625            );
626        }
627        $params->set('wt', ['json']);
628        $params->set('json.nl', ['arrarr']);
629    }
630
631    /**
632     * Return work expressions.
633     *
634     * @param WorkKeysQuery $query         Search query
635     * @param int           $offset        Search offset
636     * @param int           $limit         Search limit
637     * @param ParamBag      $defaultParams Search backend parameters
638     *
639     * @return RecordCollectionInterface
640     */
641    protected function workKeysSearch(
642        WorkKeysQuery $query,
643        int $offset,
644        int $limit,
645        ParamBag $defaultParams = null
646    ): RecordCollectionInterface {
647        $id = $query->getId();
648        if ('' === $id) {
649            throw new BackendException('Record ID empty in work keys query');
650        }
651        if (!($workKeys = $query->getWorkKeys())) {
652            $recordResponse = $this->connector->retrieve($id);
653            $recordCollection = $this->createRecordCollection($recordResponse);
654            $record = $recordCollection->first();
655            if (!$record || !($workKeys = $record->tryMethod('getWorkKeys'))) {
656                return $this->createRecordCollection('{}');
657            }
658        }
659
660        $params = $defaultParams ? clone $defaultParams : new \VuFindSearch\ParamBag();
661        $this->injectResponseWriter($params);
662        $params->set('q', "{!terms f=work_keys_str_mv separator=\"\u{001f}\"}" . implode("\u{001f}", $workKeys));
663        if (!$query->getIncludeSelf()) {
664            $params->add('fq', sprintf('-id:"%s"', addcslashes($id, '"')));
665        }
666        $params->set('rows', $limit);
667        $params->set('start', $offset);
668        if (!$params->hasParam('sort')) {
669            $params->add('sort', 'publishDateSort desc, title_sort asc');
670        }
671        $response = $this->connector->search($params);
672        $collection = $this->createRecordCollection($response);
673        $this->injectSourceIdentifier($collection);
674        return $collection;
675    }
676}