Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
40 / 40 |
|
100.00% |
3 / 3 |
CRAP | |
100.00% |
1 / 1 |
CursorMarkIdFetcher | |
100.00% |
40 / 40 |
|
100.00% |
3 / 3 |
6 | |
100.00% |
1 / 1 |
getInitialOffset | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setupBackend | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
getIdsFromBackend | |
100.00% |
30 / 30 |
|
100.00% |
1 / 1 |
4 |
1 | <?php |
2 | |
3 | /** |
4 | * Plugin to get IDs for a sitemap from a backend using cursor marks (if supported). |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2021, 2022. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org |
28 | */ |
29 | |
30 | namespace VuFind\Sitemap\Plugin\Index; |
31 | |
32 | use VuFindSearch\Backend\Solr\Response\Json\RecordCollectionFactory; |
33 | use VuFindSearch\Command\GetIdsCommand; |
34 | use VuFindSearch\ParamBag; |
35 | use VuFindSearch\Query\Query; |
36 | |
37 | /** |
38 | * Plugin to get IDs for a sitemap from a backend using cursor marks (if supported). |
39 | * |
40 | * @category VuFind |
41 | * @package Search |
42 | * @author Demian Katz <demian.katz@villanova.edu> |
43 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
44 | * @link https://vufind.org |
45 | */ |
46 | class CursorMarkIdFetcher extends AbstractIdFetcher |
47 | { |
48 | /** |
49 | * Previous cursor mark |
50 | * |
51 | * @var string |
52 | */ |
53 | protected $prevCursorMark = ''; |
54 | |
55 | /** |
56 | * Default parameters to send to Solr with each request |
57 | * |
58 | * @var array |
59 | */ |
60 | protected $defaultParams = [ |
61 | 'q' => '*:*', |
62 | 'start' => 0, // Always 0 when using a cursorMark |
63 | 'wt' => 'json', |
64 | // Override any default timeAllowed since it cannot be used with |
65 | // cursorMark |
66 | 'timeAllowed' => -1, |
67 | ]; |
68 | |
69 | /** |
70 | * Get the initial offset to seed the search process |
71 | * |
72 | * @return string |
73 | */ |
74 | public function getInitialOffset(): string |
75 | { |
76 | return '*'; |
77 | } |
78 | |
79 | /** |
80 | * Set up the backend. |
81 | * |
82 | * @param string $backend Search backend ID |
83 | * |
84 | * @return void |
85 | */ |
86 | public function setupBackend(string $backend): void |
87 | { |
88 | // Set up the record factory. We use a very simple factory since performance |
89 | // is important and we only need the identifier. |
90 | $recordFactory = function ($data) { |
91 | return new \VuFindSearch\Response\SimpleRecord($data); |
92 | }; |
93 | $this->searchService->invoke( |
94 | new \VuFindSearch\Command\SetRecordCollectionFactoryCommand( |
95 | $backend, |
96 | new RecordCollectionFactory($recordFactory) |
97 | ) |
98 | ); |
99 | |
100 | // Reset the "previous cursor mark" (in case we're reusing this object on |
101 | // multiple backends). |
102 | $this->prevCursorMark = ''; |
103 | } |
104 | |
105 | /** |
106 | * Retrieve a batch of IDs. Returns an array with two possible keys: ids (the |
107 | * latest set of retrieved IDs) and nextOffset (an offset which can be passed |
108 | * to the next call to this function to retrieve the next page). When all IDs |
109 | * have been retrieved, the nextOffset value MUST NOT be included in the return |
110 | * array. |
111 | * |
112 | * @param string $backend Search backend ID |
113 | * @param string $cursorMark String representing progress through set |
114 | * @param int $countPerPage Page size |
115 | * @param array $filters Filters to apply to the search |
116 | * |
117 | * @return array |
118 | */ |
119 | public function getIdsFromBackend( |
120 | string $backend, |
121 | string $cursorMark, |
122 | int $countPerPage, |
123 | array $filters |
124 | ): array { |
125 | // If the previous cursor mark matches the current one, we're finished! |
126 | if ($cursorMark === $this->prevCursorMark) { |
127 | return ['ids' => []]; |
128 | } |
129 | $this->prevCursorMark = $cursorMark; |
130 | |
131 | $getKeyCommand = new \VuFindSearch\Command\GetUniqueKeyCommand($backend, []); |
132 | $key = $this->searchService->invoke($getKeyCommand)->getResult(); |
133 | $params = new ParamBag( |
134 | $this->defaultParams + [ |
135 | 'rows' => $countPerPage, |
136 | 'sort' => $key . ' asc', |
137 | 'cursorMark' => $cursorMark, |
138 | 'fl' => 'last_indexed', |
139 | ] |
140 | ); |
141 | // Apply filters: |
142 | foreach ($filters as $filter) { |
143 | $params->add('fq', $filter); |
144 | } |
145 | $command = new GetIdsCommand( |
146 | $backend, |
147 | new Query('*:*'), |
148 | 0, |
149 | $countPerPage, |
150 | $params |
151 | ); |
152 | |
153 | $results = $this->searchService->invoke($command)->getResult(); |
154 | $ids = []; |
155 | $lastmods = []; |
156 | foreach ($results->getRecords() as $doc) { |
157 | $ids[] = $doc->get($key); |
158 | $lastmods[] = $doc->get('last_indexed'); |
159 | } |
160 | $nextOffset = $results->getCursorMark(); |
161 | return compact('ids', 'nextOffset', 'lastmods'); |
162 | } |
163 | } |