Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.95% |
104 / 121 |
|
66.67% |
12 / 18 |
CRAP | |
0.00% |
0 / 1 |
Connector | |
85.95% |
104 / 121 |
|
66.67% |
12 / 18 |
49.37 | |
0.00% |
0 / 1 |
__construct | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
2.09 | |||
getUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getUniqueKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLastUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
resetLastUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
retrieve | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
similar | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
search | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
terms | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
write | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
query | |
72.73% |
8 / 11 |
|
0.00% |
0 / 1 |
2.08 | |||
callWithHttpOptions | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
isRethrowableSolrException | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
3 | |||
forceToBackendException | |
50.00% |
3 / 6 |
|
0.00% |
0 / 1 |
6.00 | |||
trySolrUrls | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
10 | |||
getCore | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
send | |
96.43% |
27 / 28 |
|
0.00% |
0 / 1 |
4 |
1 | <?php |
2 | |
3 | /** |
4 | * SOLR connector. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
26 | * @author David Maus <maus@hab.de> |
27 | * @author Demian Katz <demian.katz@villanova.edu> |
28 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
29 | * @link https://vufind.org |
30 | */ |
31 | |
32 | namespace VuFindSearch\Backend\Solr; |
33 | |
34 | use Laminas\Http\Client\Adapter\Exception\TimeoutException; |
35 | use Laminas\Http\Client as HttpClient; |
36 | use Laminas\Http\Request; |
37 | use Laminas\Uri\Http; |
38 | use VuFindSearch\Backend\Exception\BackendException; |
39 | use VuFindSearch\Backend\Exception\HttpErrorException; |
40 | use VuFindSearch\Backend\Exception\RemoteErrorException; |
41 | use VuFindSearch\Backend\Exception\RequestErrorException; |
42 | use VuFindSearch\Backend\Solr\Document\DocumentInterface; |
43 | use VuFindSearch\Exception\InvalidArgumentException; |
44 | use VuFindSearch\ParamBag; |
45 | |
46 | use function call_user_func_array; |
47 | use function count; |
48 | use function is_callable; |
49 | use function sprintf; |
50 | use function strlen; |
51 | |
52 | /** |
53 | * SOLR connector. |
54 | * |
55 | * @category VuFind |
56 | * @package Search |
57 | * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> |
58 | * @author David Maus <maus@hab.de> |
59 | * @author Demian Katz <demian.katz@villanova.edu> |
60 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
61 | * @link https://vufind.org |
62 | */ |
63 | class Connector implements \Laminas\Log\LoggerAwareInterface |
64 | { |
65 | use \VuFind\Log\LoggerAwareTrait; |
66 | use \VuFindSearch\Backend\Feature\ConnectorCacheTrait; |
67 | |
68 | /** |
69 | * Maximum length of a GET url. |
70 | * |
71 | * Switches to POST if the SOLR target URL exceeds this length. |
72 | * |
73 | * @see \VuFindSearch\Backend\Solr\Connector::query() |
74 | * |
75 | * @var int |
76 | */ |
77 | public const MAX_GET_URL_LENGTH = 2048; |
78 | |
79 | /** |
80 | * HTTP client factory |
81 | * |
82 | * @var callable |
83 | */ |
84 | protected $clientFactory; |
85 | |
86 | /** |
87 | * URL or an array of alternative URLs of the SOLR core. |
88 | * |
89 | * @var string|array |
90 | */ |
91 | protected $url; |
92 | |
93 | /** |
94 | * Handler map. |
95 | * |
96 | * @var HandlerMap |
97 | */ |
98 | protected $map; |
99 | |
100 | /** |
101 | * Solr field used to store unique identifier |
102 | * |
103 | * @var string |
104 | */ |
105 | protected $uniqueKey; |
106 | |
107 | /** |
108 | * Url of the last request |
109 | * |
110 | * @var ?Http |
111 | */ |
112 | protected $lastUrl = null; |
113 | |
114 | /** |
115 | * Constructor |
116 | * |
117 | * @param string|array $url SOLR core URL or an array of alternative |
118 | * URLs |
119 | * @param HandlerMap $map Handler map |
120 | * @param callable|HttpClient $cf HTTP client factory or a client to clone |
121 | * @param string $uniqueKey Solr field used to store unique |
122 | * identifier |
123 | */ |
124 | public function __construct( |
125 | $url, |
126 | HandlerMap $map, |
127 | $cf, |
128 | $uniqueKey = 'id' |
129 | ) { |
130 | $this->url = $url; |
131 | $this->map = $map; |
132 | $this->uniqueKey = $uniqueKey; |
133 | if ($cf instanceof HttpClient) { |
134 | $this->clientFactory = function () use ($cf) { |
135 | return clone $cf; |
136 | }; |
137 | } else { |
138 | $this->clientFactory = $cf; |
139 | } |
140 | } |
141 | |
142 | /// Public API |
143 | |
144 | /** |
145 | * Get the Solr URL. |
146 | * |
147 | * @return string |
148 | */ |
149 | public function getUrl() |
150 | { |
151 | return $this->url; |
152 | } |
153 | |
154 | /** |
155 | * Return handler map. |
156 | * |
157 | * @return HandlerMap |
158 | */ |
159 | public function getMap() |
160 | { |
161 | return $this->map; |
162 | } |
163 | |
164 | /** |
165 | * Get unique key. |
166 | * |
167 | * @return string |
168 | */ |
169 | public function getUniqueKey() |
170 | { |
171 | return $this->uniqueKey; |
172 | } |
173 | |
174 | /** |
175 | * Get the last request url. |
176 | * |
177 | * @return ?Http |
178 | */ |
179 | public function getLastUrl() |
180 | { |
181 | return $this->lastUrl; |
182 | } |
183 | |
184 | /** |
185 | * Clears the last url |
186 | * |
187 | * @return void |
188 | */ |
189 | public function resetLastUrl() |
190 | { |
191 | $this->lastUrl = null; |
192 | } |
193 | |
194 | /** |
195 | * Return document specified by id. |
196 | * |
197 | * @param string $id The document to retrieve from Solr |
198 | * @param ParamBag $params Parameters |
199 | * |
200 | * @return string |
201 | */ |
202 | public function retrieve($id, ParamBag $params = null) |
203 | { |
204 | $params = $params ?: new ParamBag(); |
205 | $params |
206 | ->set('q', sprintf('%s:"%s"', $this->uniqueKey, addcslashes($id, '"'))); |
207 | |
208 | $handler = $this->map->getHandler(__FUNCTION__); |
209 | $this->map->prepare(__FUNCTION__, $params); |
210 | |
211 | return $this->query($handler, $params, true); |
212 | } |
213 | |
214 | /** |
215 | * Return records similar to a given record specified by id. |
216 | * |
217 | * Uses MoreLikeThis Request Component or MoreLikeThis Handler |
218 | * |
219 | * @param string $id ID of given record (not currently used, but |
220 | * retained for backward compatibility / extensibility). |
221 | * @param ParamBag $params Parameters |
222 | * |
223 | * @return string |
224 | * |
225 | * @SuppressWarnings(PHPMD.UnusedFormalParameter) |
226 | */ |
227 | public function similar($id, ParamBag $params) |
228 | { |
229 | $handler = $this->map->getHandler(__FUNCTION__); |
230 | $this->map->prepare(__FUNCTION__, $params); |
231 | |
232 | try { |
233 | return $this->query($handler, $params, true); |
234 | } catch (RequestErrorException $e) { |
235 | // If Solr was unable to fetch the record, just act like we have no similar records: |
236 | if (str_contains($e->getMessage(), 'Could not fetch document with id')) { |
237 | return '{}'; |
238 | } |
239 | throw $e; |
240 | } |
241 | } |
242 | |
243 | /** |
244 | * Execute a search. |
245 | * |
246 | * @param ParamBag $params Parameters |
247 | * |
248 | * @return string |
249 | */ |
250 | public function search(ParamBag $params) |
251 | { |
252 | $handler = $this->map->getHandler(__FUNCTION__); |
253 | $this->map->prepare(__FUNCTION__, $params); |
254 | return $this->query($handler, $params, true); |
255 | } |
256 | |
257 | /** |
258 | * Extract terms from a SOLR index. |
259 | * |
260 | * @param ParamBag $params Parameters |
261 | * |
262 | * @return string |
263 | */ |
264 | public function terms(ParamBag $params) |
265 | { |
266 | $handler = $this->map->getHandler(__FUNCTION__); |
267 | $this->map->prepare(__FUNCTION__, $params); |
268 | |
269 | return $this->query($handler, $params, true); |
270 | } |
271 | |
272 | /** |
273 | * Write to the SOLR index. |
274 | * |
275 | * @param DocumentInterface $document Document to write |
276 | * @param string $handler Update handler |
277 | * @param ParamBag $params Update handler parameters |
278 | * |
279 | * @return string Response body |
280 | */ |
281 | public function write( |
282 | DocumentInterface $document, |
283 | $handler = 'update', |
284 | ParamBag $params = null |
285 | ) { |
286 | $params = $params ?: new ParamBag(); |
287 | $urlSuffix = "/{$handler}"; |
288 | if (count($params) > 0) { |
289 | $urlSuffix .= '?' . implode('&', $params->request()); |
290 | } |
291 | $callback = function ($client) use ($document) { |
292 | $client->setEncType($document->getContentType()); |
293 | $body = $document->getContent(); |
294 | $client->setRawBody($body); |
295 | $client->getRequest()->getHeaders() |
296 | ->addHeaderLine('Content-Length', strlen($body)); |
297 | }; |
298 | return $this->trySolrUrls('POST', $urlSuffix, $callback); |
299 | } |
300 | |
301 | /// Internal API |
302 | |
303 | /** |
304 | * Send query to SOLR and return response body. |
305 | * |
306 | * @param string $handler SOLR request handler to use |
307 | * @param ParamBag $params Request parameters |
308 | * @param bool $cacheable Whether the query is cacheable |
309 | * |
310 | * @return string Response body |
311 | */ |
312 | public function query($handler, ParamBag $params, bool $cacheable = false) |
313 | { |
314 | $urlSuffix = '/' . $handler; |
315 | $paramString = implode('&', $params->request()); |
316 | if (strlen($paramString) > self::MAX_GET_URL_LENGTH) { |
317 | $method = Request::METHOD_POST; |
318 | $callback = function ($client) use ($paramString) { |
319 | $client->setRawBody($paramString); |
320 | $client->setEncType(HttpClient::ENC_URLENCODED); |
321 | $client->setHeaders(['Content-Length' => strlen($paramString)]); |
322 | }; |
323 | } else { |
324 | $method = Request::METHOD_GET; |
325 | $urlSuffix .= '?' . $paramString; |
326 | $callback = null; |
327 | } |
328 | |
329 | $this->debug(sprintf('Query %s', $paramString)); |
330 | return $this->trySolrUrls($method, $urlSuffix, $callback, $cacheable); |
331 | } |
332 | |
333 | /** |
334 | * Call a method with provided options for the HTTP client |
335 | * |
336 | * @param array $options HTTP client options |
337 | * @param string $method Method to call |
338 | * @param array ...$args Method parameters |
339 | * |
340 | * @return mixed |
341 | */ |
342 | public function callWithHttpOptions( |
343 | array $options, |
344 | string $method, |
345 | ...$args |
346 | ) { |
347 | $reflectionMethod = new \ReflectionMethod($this, $method); |
348 | if (!$reflectionMethod->isPublic()) { |
349 | throw new InvalidArgumentException("Method '$method' is not public"); |
350 | } |
351 | if (empty($options)) { |
352 | return call_user_func_array([$this, $method], $args); |
353 | } |
354 | $originalFactory = $this->clientFactory; |
355 | try { |
356 | $this->clientFactory = function (string $url) use ( |
357 | $originalFactory, |
358 | $options |
359 | ) { |
360 | $client = $originalFactory($url); |
361 | $client->setOptions($options); |
362 | return $client; |
363 | }; |
364 | return call_user_func_array([$this, $method], $args); |
365 | } finally { |
366 | $this->clientFactory = $originalFactory; |
367 | } |
368 | } |
369 | |
370 | /** |
371 | * Check if an exception from a Solr request should be thrown rather than retried |
372 | * |
373 | * @param \Exception $ex Exception |
374 | * |
375 | * @return bool |
376 | */ |
377 | protected function isRethrowableSolrException($ex) |
378 | { |
379 | // Solr can return 404 when the instance hasn't completed startup, so allow that to be retried: |
380 | return $ex instanceof TimeoutException |
381 | || (($ex instanceof RequestErrorException) && $ex->getResponse()->getStatusCode() !== 404); |
382 | } |
383 | |
384 | /** |
385 | * If an unexpected exception type was received, wrap it in a generic |
386 | * BackendException to standardize upstream handling. |
387 | * |
388 | * @param \Exception $ex Exception |
389 | * |
390 | * @return \Exception |
391 | */ |
392 | protected function forceToBackendException($ex) |
393 | { |
394 | // Don't wrap specific backend exceptions.... |
395 | if ( |
396 | $ex instanceof RemoteErrorException |
397 | || $ex instanceof RequestErrorException |
398 | || $ex instanceof HttpErrorException |
399 | ) { |
400 | return $ex; |
401 | } |
402 | return |
403 | new BackendException('Problem connecting to Solr.', $ex->getCode(), $ex); |
404 | } |
405 | |
406 | /** |
407 | * Try all Solr URLs until we find one that works (or throw an exception). |
408 | * |
409 | * @param string $method HTTP method to use |
410 | * @param string $urlSuffix Suffix to append to all URLs tried |
411 | * @param callable $callback Callback to configure client (null for none) |
412 | * @param bool $cacheable Whether the request is cacheable |
413 | * |
414 | * @return string Response body |
415 | * |
416 | * @throws RemoteErrorException SOLR signaled a server error (HTTP 5xx) |
417 | * @throws RequestErrorException SOLR signaled a client error (HTTP 4xx) |
418 | */ |
419 | protected function trySolrUrls( |
420 | $method, |
421 | $urlSuffix, |
422 | $callback = null, |
423 | bool $cacheable = false |
424 | ) { |
425 | // This exception should never get thrown; it's just a safety in case |
426 | // something unanticipated occurs. |
427 | $exception = new \Exception('Unexpected exception.'); |
428 | |
429 | // Loop through all base URLs and try them in turn until one works. |
430 | $cacheKey = null; |
431 | foreach ((array)$this->url as $base) { |
432 | $client = ($this->clientFactory)($base . $urlSuffix); |
433 | $client->setMethod($method); |
434 | if (is_callable($callback)) { |
435 | $callback($client); |
436 | } |
437 | // Always create the cache key from the first server, and only after any |
438 | // callback has been called above. |
439 | if ($cacheable && $this->cache && null === $cacheKey) { |
440 | $cacheKey = $this->getCacheKey($client); |
441 | if ($result = $this->getCachedData($cacheKey)) { |
442 | return $result; |
443 | } |
444 | } |
445 | try { |
446 | $result = $this->send($client); |
447 | if ($cacheKey) { |
448 | $this->putCachedData($cacheKey, $result); |
449 | } |
450 | return $result; |
451 | } catch (\Exception $ex) { |
452 | if ($this->isRethrowableSolrException($ex)) { |
453 | throw $this->forceToBackendException($ex); |
454 | } |
455 | $exception = $ex; |
456 | } |
457 | } |
458 | |
459 | // If we got this far, everything failed -- throw a BackendException with |
460 | // the most recent exception caught above set as the previous exception. |
461 | throw $this->forceToBackendException($exception); |
462 | } |
463 | |
464 | /** |
465 | * Extract the Solr core from the connector's URL. |
466 | * |
467 | * @return string |
468 | */ |
469 | public function getCore(): string |
470 | { |
471 | $url = rtrim($this->getUrl(), '/'); |
472 | $parts = explode('/', $url); |
473 | return array_pop($parts); |
474 | } |
475 | |
476 | /** |
477 | * Send request the SOLR and return the response. |
478 | * |
479 | * @param HttpClient $client Prepared HTTP client |
480 | * |
481 | * @return string Response body |
482 | * |
483 | * @throws RemoteErrorException SOLR signaled a server error (HTTP 5xx) |
484 | * @throws RequestErrorException SOLR signaled a client error (HTTP 4xx) |
485 | */ |
486 | protected function send(HttpClient $client) |
487 | { |
488 | $this->debug( |
489 | sprintf('=> %s %s', $client->getMethod(), $client->getUri()) |
490 | ); |
491 | |
492 | $this->lastUrl = $client->getUri(); |
493 | |
494 | $time = microtime(true); |
495 | $response = $client->send(); |
496 | $time = microtime(true) - $time; |
497 | |
498 | $this->debug( |
499 | sprintf( |
500 | '<= %s %s', |
501 | $response->getStatusCode(), |
502 | $response->getReasonPhrase() |
503 | ), |
504 | ['time' => $time] |
505 | ); |
506 | |
507 | if (!$response->isSuccess()) { |
508 | // Return a more detailed error message for a 400 error: |
509 | if ($response->getStatusCode() === 400) { |
510 | $json = json_decode($response->getBody(), true); |
511 | $msgParts = ['400', $response->getReasonPhrase()]; |
512 | if ($msg = $json['error']['msg'] ?? '') { |
513 | $msgParts[] = $msg; |
514 | } |
515 | throw new RequestErrorException( |
516 | implode(' ', $msgParts), |
517 | 400, |
518 | $response |
519 | ); |
520 | } |
521 | throw HttpErrorException::createFromResponse($response); |
522 | } |
523 | return $response->getBody(); |
524 | } |
525 | } |