Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.79% |
95 / 107 |
|
72.22% |
13 / 18 |
CRAP | |
0.00% |
0 / 1 |
Connector | |
88.79% |
95 / 107 |
|
72.22% |
13 / 18 |
42.26 | |
0.00% |
0 / 1 |
__construct | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
2.09 | |||
getUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getUniqueKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLastUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
resetLastUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
retrieve | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
similar | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
search | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
terms | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
write | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
query | |
72.73% |
8 / 11 |
|
0.00% |
0 / 1 |
2.08 | |||
callWithHttpOptions | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
isRethrowableSolrException | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
3 | |||
forceToBackendException | |
50.00% |
3 / 6 |
|
0.00% |
0 / 1 |
6.00 | |||
trySolrUrls | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
10 | |||
getCore | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
send | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | /** |
4 | * SOLR connector. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
26 | * @author David Maus <maus@hab.de> |
27 | * @author Demian Katz <demian.katz@villanova.edu> |
28 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
29 | * @link https://vufind.org |
30 | */ |
31 | |
32 | namespace VuFindSearch\Backend\Solr; |
33 | |
34 | use Laminas\Http\Client\Adapter\Exception\TimeoutException; |
35 | use Laminas\Http\Client as HttpClient; |
36 | use Laminas\Http\Request; |
37 | use Laminas\Uri\Http; |
38 | use VuFindSearch\Backend\Exception\BackendException; |
39 | use VuFindSearch\Backend\Exception\HttpErrorException; |
40 | use VuFindSearch\Backend\Exception\RemoteErrorException; |
41 | use VuFindSearch\Backend\Exception\RequestErrorException; |
42 | use VuFindSearch\Backend\Solr\Document\DocumentInterface; |
43 | use VuFindSearch\Exception\InvalidArgumentException; |
44 | use VuFindSearch\ParamBag; |
45 | |
46 | use function call_user_func_array; |
47 | use function count; |
48 | use function is_callable; |
49 | use function strlen; |
50 | |
51 | /** |
52 | * SOLR connector. |
53 | * |
54 | * @category VuFind |
55 | * @package Search |
56 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
57 | * @author David Maus <maus@hab.de> |
58 | * @author Demian Katz <demian.katz@villanova.edu> |
59 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
60 | * @link https://vufind.org |
61 | */ |
62 | class Connector implements \Laminas\Log\LoggerAwareInterface |
63 | { |
64 | use \VuFind\Log\LoggerAwareTrait; |
65 | use \VuFindSearch\Backend\Feature\ConnectorCacheTrait; |
66 | |
67 | /** |
68 | * Maximum length of a GET url. |
69 | * |
70 | * Switches to POST if the SOLR target URL exceeds this length. |
71 | * |
72 | * @see \VuFindSearch\Backend\Solr\Connector::query() |
73 | * |
74 | * @var int |
75 | */ |
76 | public const MAX_GET_URL_LENGTH = 2048; |
77 | |
78 | /** |
79 | * HTTP client factory |
80 | * |
81 | * @var callable |
82 | */ |
83 | protected $clientFactory; |
84 | |
85 | /** |
86 | * URL or an array of alternative URLs of the SOLR core. |
87 | * |
88 | * @var string|array |
89 | */ |
90 | protected $url; |
91 | |
92 | /** |
93 | * Handler map. |
94 | * |
95 | * @var HandlerMap |
96 | */ |
97 | protected $map; |
98 | |
99 | /** |
100 | * Solr field used to store unique identifier |
101 | * |
102 | * @var string |
103 | */ |
104 | protected $uniqueKey; |
105 | |
106 | /** |
107 | * Url of the last request |
108 | * |
109 | * @var ?Http |
110 | */ |
111 | protected $lastUrl = null; |
112 | |
113 | /** |
114 | * Constructor |
115 | * |
116 | * @param string|array $url SOLR core URL or an array of alternative |
117 | * URLs |
118 | * @param HandlerMap $map Handler map |
119 | * @param callable|HttpClient $cf HTTP client factory or a client to clone |
120 | * @param string $uniqueKey Solr field used to store unique |
121 | * identifier |
122 | */ |
123 | public function __construct( |
124 | $url, |
125 | HandlerMap $map, |
126 | $cf, |
127 | $uniqueKey = 'id' |
128 | ) { |
129 | $this->url = $url; |
130 | $this->map = $map; |
131 | $this->uniqueKey = $uniqueKey; |
132 | if ($cf instanceof HttpClient) { |
133 | $this->clientFactory = function () use ($cf) { |
134 | return clone $cf; |
135 | }; |
136 | } else { |
137 | $this->clientFactory = $cf; |
138 | } |
139 | } |
140 | |
141 | /// Public API |
142 | |
143 | /** |
144 | * Get the Solr URL. |
145 | * |
146 | * @return string |
147 | */ |
148 | public function getUrl() |
149 | { |
150 | return $this->url; |
151 | } |
152 | |
153 | /** |
154 | * Return handler map. |
155 | * |
156 | * @return HandlerMap |
157 | */ |
158 | public function getMap() |
159 | { |
160 | return $this->map; |
161 | } |
162 | |
163 | /** |
164 | * Get unique key. |
165 | * |
166 | * @return string |
167 | */ |
168 | public function getUniqueKey() |
169 | { |
170 | return $this->uniqueKey; |
171 | } |
172 | |
173 | /** |
174 | * Get the last request url. |
175 | * |
176 | * @return ?Http |
177 | */ |
178 | public function getLastUrl() |
179 | { |
180 | return $this->lastUrl; |
181 | } |
182 | |
183 | /** |
184 | * Clears the last url |
185 | * |
186 | * @return void |
187 | */ |
188 | public function resetLastUrl() |
189 | { |
190 | $this->lastUrl = null; |
191 | } |
192 | |
193 | /** |
194 | * Return document specified by id. |
195 | * |
196 | * @param string $id The document to retrieve from Solr |
197 | * @param ParamBag $params Parameters |
198 | * |
199 | * @return string |
200 | */ |
201 | public function retrieve($id, ParamBag $params = null) |
202 | { |
203 | $params = $params ?: new ParamBag(); |
204 | $params |
205 | ->set('q', sprintf('%s:"%s"', $this->uniqueKey, addcslashes($id, '"'))); |
206 | |
207 | $handler = $this->map->getHandler(__FUNCTION__); |
208 | $this->map->prepare(__FUNCTION__, $params); |
209 | |
210 | return $this->query($handler, $params, true); |
211 | } |
212 | |
213 | /** |
214 | * Return records similar to a given record specified by id. |
215 | * |
216 | * Uses MoreLikeThis Request Component or MoreLikeThis Handler |
217 | * |
218 | * @param string $id ID of given record (not currently used, but |
219 | * retained for backward compatibility / extensibility). |
220 | * @param ParamBag $params Parameters |
221 | * |
222 | * @return string |
223 | * |
224 | * @SuppressWarnings(PHPMD.UnusedFormalParameter) |
225 | */ |
226 | public function similar($id, ParamBag $params) |
227 | { |
228 | $handler = $this->map->getHandler(__FUNCTION__); |
229 | $this->map->prepare(__FUNCTION__, $params); |
230 | return $this->query($handler, $params, true); |
231 | } |
232 | |
233 | /** |
234 | * Execute a search. |
235 | * |
236 | * @param ParamBag $params Parameters |
237 | * |
238 | * @return string |
239 | */ |
240 | public function search(ParamBag $params) |
241 | { |
242 | $handler = $this->map->getHandler(__FUNCTION__); |
243 | $this->map->prepare(__FUNCTION__, $params); |
244 | return $this->query($handler, $params, true); |
245 | } |
246 | |
247 | /** |
248 | * Extract terms from a SOLR index. |
249 | * |
250 | * @param ParamBag $params Parameters |
251 | * |
252 | * @return string |
253 | */ |
254 | public function terms(ParamBag $params) |
255 | { |
256 | $handler = $this->map->getHandler(__FUNCTION__); |
257 | $this->map->prepare(__FUNCTION__, $params); |
258 | |
259 | return $this->query($handler, $params, true); |
260 | } |
261 | |
262 | /** |
263 | * Write to the SOLR index. |
264 | * |
265 | * @param DocumentInterface $document Document to write |
266 | * @param string $handler Update handler |
267 | * @param ParamBag $params Update handler parameters |
268 | * |
269 | * @return string Response body |
270 | */ |
271 | public function write( |
272 | DocumentInterface $document, |
273 | $handler = 'update', |
274 | ParamBag $params = null |
275 | ) { |
276 | $params = $params ?: new ParamBag(); |
277 | $urlSuffix = "/{$handler}"; |
278 | if (count($params) > 0) { |
279 | $urlSuffix .= '?' . implode('&', $params->request()); |
280 | } |
281 | $callback = function ($client) use ($document) { |
282 | $client->setEncType($document->getContentType()); |
283 | $body = $document->getContent(); |
284 | $client->setRawBody($body); |
285 | $client->getRequest()->getHeaders() |
286 | ->addHeaderLine('Content-Length', strlen($body)); |
287 | }; |
288 | return $this->trySolrUrls('POST', $urlSuffix, $callback); |
289 | } |
290 | |
291 | /// Internal API |
292 | |
293 | /** |
294 | * Send query to SOLR and return response body. |
295 | * |
296 | * @param string $handler SOLR request handler to use |
297 | * @param ParamBag $params Request parameters |
298 | * @param bool $cacheable Whether the query is cacheable |
299 | * |
300 | * @return string Response body |
301 | */ |
302 | public function query($handler, ParamBag $params, bool $cacheable = false) |
303 | { |
304 | $urlSuffix = '/' . $handler; |
305 | $paramString = implode('&', $params->request()); |
306 | if (strlen($paramString) > self::MAX_GET_URL_LENGTH) { |
307 | $method = Request::METHOD_POST; |
308 | $callback = function ($client) use ($paramString) { |
309 | $client->setRawBody($paramString); |
310 | $client->setEncType(HttpClient::ENC_URLENCODED); |
311 | $client->setHeaders(['Content-Length' => strlen($paramString)]); |
312 | }; |
313 | } else { |
314 | $method = Request::METHOD_GET; |
315 | $urlSuffix .= '?' . $paramString; |
316 | $callback = null; |
317 | } |
318 | |
319 | $this->debug(sprintf('Query %s', $paramString)); |
320 | return $this->trySolrUrls($method, $urlSuffix, $callback, $cacheable); |
321 | } |
322 | |
323 | /** |
324 | * Call a method with provided options for the HTTP client |
325 | * |
326 | * @param array $options HTTP client options |
327 | * @param string $method Method to call |
328 | * @param array ...$args Method parameters |
329 | * |
330 | * @return mixed |
331 | */ |
332 | public function callWithHttpOptions( |
333 | array $options, |
334 | string $method, |
335 | ...$args |
336 | ) { |
337 | $reflectionMethod = new \ReflectionMethod($this, $method); |
338 | if (!$reflectionMethod->isPublic()) { |
339 | throw new InvalidArgumentException("Method '$method' is not public"); |
340 | } |
341 | if (empty($options)) { |
342 | return call_user_func_array([$this, $method], $args); |
343 | } |
344 | $originalFactory = $this->clientFactory; |
345 | try { |
346 | $this->clientFactory = function (string $url) use ( |
347 | $originalFactory, |
348 | $options |
349 | ) { |
350 | $client = $originalFactory($url); |
351 | $client->setOptions($options); |
352 | return $client; |
353 | }; |
354 | return call_user_func_array([$this, $method], $args); |
355 | } finally { |
356 | $this->clientFactory = $originalFactory; |
357 | } |
358 | } |
359 | |
360 | /** |
361 | * Check if an exception from a Solr request should be thrown rather than retried |
362 | * |
363 | * @param \Exception $ex Exception |
364 | * |
365 | * @return bool |
366 | */ |
367 | protected function isRethrowableSolrException($ex) |
368 | { |
369 | // Solr can return 404 when the instance hasn't completed startup, so allow that to be retried: |
370 | return $ex instanceof TimeoutException |
371 | || (($ex instanceof RequestErrorException) && $ex->getResponse()->getStatusCode() !== 404); |
372 | } |
373 | |
374 | /** |
375 | * If an unexpected exception type was received, wrap it in a generic |
376 | * BackendException to standardize upstream handling. |
377 | * |
378 | * @param \Exception $ex Exception |
379 | * |
380 | * @return \Exception |
381 | */ |
382 | protected function forceToBackendException($ex) |
383 | { |
384 | // Don't wrap specific backend exceptions.... |
385 | if ( |
386 | $ex instanceof RemoteErrorException |
387 | || $ex instanceof RequestErrorException |
388 | || $ex instanceof HttpErrorException |
389 | ) { |
390 | return $ex; |
391 | } |
392 | return |
393 | new BackendException('Problem connecting to Solr.', $ex->getCode(), $ex); |
394 | } |
395 | |
396 | /** |
397 | * Try all Solr URLs until we find one that works (or throw an exception). |
398 | * |
399 | * @param string $method HTTP method to use |
400 | * @param string $urlSuffix Suffix to append to all URLs tried |
401 | * @param callable $callback Callback to configure client (null for none) |
402 | * @param bool $cacheable Whether the request is cacheable |
403 | * |
404 | * @return string Response body |
405 | * |
406 | * @throws RemoteErrorException SOLR signaled a server error (HTTP 5xx) |
407 | * @throws RequestErrorException SOLR signaled a client error (HTTP 4xx) |
408 | */ |
409 | protected function trySolrUrls( |
410 | $method, |
411 | $urlSuffix, |
412 | $callback = null, |
413 | bool $cacheable = false |
414 | ) { |
415 | // This exception should never get thrown; it's just a safety in case |
416 | // something unanticipated occurs. |
417 | $exception = new \Exception('Unexpected exception.'); |
418 | |
419 | // Loop through all base URLs and try them in turn until one works. |
420 | $cacheKey = null; |
421 | foreach ((array)$this->url as $base) { |
422 | $client = ($this->clientFactory)($base . $urlSuffix); |
423 | $client->setMethod($method); |
424 | if (is_callable($callback)) { |
425 | $callback($client); |
426 | } |
427 | // Always create the cache key from the first server, and only after any |
428 | // callback has been called above. |
429 | if ($cacheable && $this->cache && null === $cacheKey) { |
430 | $cacheKey = $this->getCacheKey($client); |
431 | if ($result = $this->getCachedData($cacheKey)) { |
432 | return $result; |
433 | } |
434 | } |
435 | try { |
436 | $result = $this->send($client); |
437 | if ($cacheKey) { |
438 | $this->putCachedData($cacheKey, $result); |
439 | } |
440 | return $result; |
441 | } catch (\Exception $ex) { |
442 | if ($this->isRethrowableSolrException($ex)) { |
443 | throw $this->forceToBackendException($ex); |
444 | } |
445 | $exception = $ex; |
446 | } |
447 | } |
448 | |
449 | // If we got this far, everything failed -- throw a BackendException with |
450 | // the most recent exception caught above set as the previous exception. |
451 | throw $this->forceToBackendException($exception); |
452 | } |
453 | |
454 | /** |
455 | * Extract the Solr core from the connector's URL. |
456 | * |
457 | * @return string |
458 | */ |
459 | public function getCore(): string |
460 | { |
461 | $url = rtrim($this->getUrl(), '/'); |
462 | $parts = explode('/', $url); |
463 | return array_pop($parts); |
464 | } |
465 | |
466 | /** |
467 | * Send request the SOLR and return the response. |
468 | * |
469 | * @param HttpClient $client Prepared HTTP client |
470 | * |
471 | * @return string Response body |
472 | * |
473 | * @throws RemoteErrorException SOLR signaled a server error (HTTP 5xx) |
474 | * @throws RequestErrorException SOLR signaled a client error (HTTP 4xx) |
475 | */ |
476 | protected function send(HttpClient $client) |
477 | { |
478 | $this->debug( |
479 | sprintf('=> %s %s', $client->getMethod(), $client->getUri()) |
480 | ); |
481 | |
482 | $this->lastUrl = $client->getUri(); |
483 | |
484 | $time = microtime(true); |
485 | $response = $client->send(); |
486 | $time = microtime(true) - $time; |
487 | |
488 | $this->debug( |
489 | sprintf( |
490 | '<= %s %s', |
491 | $response->getStatusCode(), |
492 | $response->getReasonPhrase() |
493 | ), |
494 | ['time' => $time] |
495 | ); |
496 | |
497 | if (!$response->isSuccess()) { |
498 | throw HttpErrorException::createFromResponse($response); |
499 | } |
500 | return $response->getBody(); |
501 | } |
502 | } |