Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
76.67% |
230 / 300 |
|
40.00% |
4 / 10 |
CRAP | |
0.00% |
0 / 1 |
Connector | |
76.67% |
230 / 300 |
|
40.00% |
4 / 10 |
173.64 | |
0.00% |
0 / 1 |
__construct | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
5.03 | |||
query | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
2 | |||
performSearch | |
51.47% |
35 / 68 |
|
0.00% |
0 / 1 |
89.83 | |||
call | |
86.36% |
19 / 22 |
|
0.00% |
0 / 1 |
8.16 | |||
process | |
90.57% |
96 / 106 |
|
0.00% |
0 / 1 |
27.61 | |||
getRecord | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
getRecords | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
12 | |||
getInstitutionCode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
processHighlighting | |
80.49% |
33 / 41 |
|
0.00% |
0 / 1 |
9.60 | |||
processDescription | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | /** |
4 | * Primo Central connector. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Search |
25 | * @author Spencer Lamm <slamm1@swarthmore.edu> |
26 | * @author Anna Headley <aheadle1@swarthmore.edu> |
27 | * @author Chelsea Lobdell <clobdel1@swarthmore.edu> |
28 | * @author Demian Katz <demian.katz@villanova.edu> |
29 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
30 | * @author Oliver Goldschmidt <o.goldschmidt@tuhh.de> |
31 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
32 | * @link https://vufind.org |
33 | */ |
34 | |
35 | namespace VuFindSearch\Backend\Primo; |
36 | |
37 | use Laminas\Http\Client as HttpClient; |
38 | |
39 | use function array_key_exists; |
40 | use function count; |
41 | use function in_array; |
42 | use function is_array; |
43 | use function strlen; |
44 | |
45 | /** |
46 | * Primo Central connector. |
47 | * |
48 | * @category VuFind |
49 | * @package Search |
50 | * @author Spencer Lamm <slamm1@swarthmore.edu> |
51 | * @author Anna Headley <aheadle1@swarthmore.edu> |
52 | * @author Chelsea Lobdell <clobdel1@swarthmore.edu> |
53 | * @author Demian Katz <demian.katz@villanova.edu> |
54 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
55 | * @author Oliver Goldschmidt <o.goldschmidt@tuhh.de> |
56 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
57 | * @link https://vufind.org |
58 | * |
59 | * @deprecated Use RestConnector instead |
60 | */ |
61 | class Connector implements ConnectorInterface, \Laminas\Log\LoggerAwareInterface |
62 | { |
63 | use \VuFind\Log\LoggerAwareTrait; |
64 | use \VuFindSearch\Backend\Feature\ConnectorCacheTrait; |
65 | |
66 | /** |
67 | * HTTP client used for API transactions |
68 | * |
69 | * @var HttpClient |
70 | */ |
71 | public $client; |
72 | |
73 | /** |
74 | * Institution code |
75 | * |
76 | * @var string |
77 | */ |
78 | protected $inst; |
79 | |
80 | /** |
81 | * Base URL for API |
82 | * |
83 | * @var string |
84 | */ |
85 | protected $host; |
86 | |
87 | /** |
88 | * Response for an empty search |
89 | * |
90 | * @var array |
91 | */ |
92 | protected static $emptyQueryResponse = [ |
93 | 'recordCount' => 0, |
94 | 'documents' => [], |
95 | 'facets' => [], |
96 | 'error' => 'empty_search_disallowed', |
97 | ]; |
98 | |
99 | /** |
100 | * Regular expression to match highlighted terms |
101 | * |
102 | * @var string |
103 | */ |
104 | protected $highlightRegEx = '{<span[^>]*>([^<]*?)</span>}si'; |
105 | |
106 | /** |
107 | * Constructor |
108 | * |
109 | * Sets up the Primo API Client |
110 | * |
111 | * @param string $url Primo API URL (either a host name and port or a full |
112 | * path to the brief search including a trailing question mark) |
113 | * @param string $inst Institution code |
114 | * @param HttpClient $client HTTP client |
115 | */ |
116 | public function __construct($url, $inst, $client) |
117 | { |
118 | $parts = parse_url($url); |
119 | if (empty($parts['path']) || $parts['path'] == '/') { |
120 | $parts['path'] = '/PrimoWebServices/xservice/search/brief'; |
121 | } |
122 | $this->host = $parts['scheme'] . '://' . $parts['host'] |
123 | . (!empty($parts['port']) ? ':' . $parts['port'] : '') |
124 | . $parts['path'] . '?'; |
125 | if (!empty($parts['query'])) { |
126 | $this->host .= $parts['query'] . '&'; |
127 | } |
128 | |
129 | $this->inst = $inst; |
130 | $this->client = $client; |
131 | } |
132 | |
133 | /** |
134 | * Execute a search. Adds all the querystring parameters into |
135 | * $this->client and returns the parsed response |
136 | * |
137 | * @param string $institution Institution |
138 | * @param array $terms Associative array: |
139 | * index string: primo index to search (default "any") |
140 | * lookfor string: actual search terms |
141 | * @param array $params Associative array of optional arguments: |
142 | * phrase bool: true if it's a quoted phrase (default false) |
143 | * onCampus bool: (default true) |
144 | * didyoumean bool: (default false) |
145 | * filterList array: (field, value) pairs to filter results (def null) |
146 | * pageNumber string: index of first record (default 1) |
147 | * limit string: number of records to return (default 20) |
148 | * sort string: value to be used by for sorting (default null) |
149 | * highlight bool: whether to highlight search term matches in records |
150 | * highlightStart string: Prefix for a highlighted term |
151 | * highlightEnd string: Suffix for a Highlighted term |
152 | * Anything in $params not listed here will be ignored. |
153 | * |
154 | * Note: some input parameters accepted by Primo are not implemented here: |
155 | * - dym (did you mean) |
156 | * - more (get more) |
157 | * - lang (specify input language so engine can do lang. recognition) |
158 | * - displayField (has to do with highlighting somehow) |
159 | * |
160 | * @throws \Exception |
161 | * @return array An array of query results |
162 | * |
163 | * @link http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search |
164 | */ |
165 | public function query($institution, $terms, $params = null) |
166 | { |
167 | // defaults for params |
168 | $args = [ |
169 | 'phrase' => false, |
170 | 'onCampus' => true, |
171 | 'didYouMean' => false, |
172 | 'filterList' => null, |
173 | 'pcAvailability' => false, |
174 | 'pageNumber' => 1, |
175 | 'limit' => 20, |
176 | 'sort' => null, |
177 | 'highlight' => false, |
178 | 'highlightStart' => '', |
179 | 'highlightEnd' => '', |
180 | ]; |
181 | if (isset($params)) { |
182 | $args = array_merge($args, $params); |
183 | } |
184 | |
185 | $result = $this->performSearch($institution, $terms, $args); |
186 | return $result; |
187 | } |
188 | |
189 | /** |
190 | * Support method for query() -- perform inner search logic |
191 | * |
192 | * @param string $institution Institution |
193 | * @param array $terms Associative array: |
194 | * index string: primo index to search (default "any") |
195 | * lookfor string: actual search terms |
196 | * @param array $args Associative array of optional arguments (see query |
197 | * method for more information) |
198 | * |
199 | * @throws \Exception |
200 | * @return array An array of query results |
201 | */ |
202 | protected function performSearch($institution, $terms, $args) |
203 | { |
204 | // we have to build a querystring because I think adding them |
205 | // incrementally is implemented as a dictionary, but we are allowed |
206 | // multiple querystring parameters with the same key. |
207 | $qs = []; |
208 | |
209 | // QUERYSTRING: query (search terms) |
210 | // re: phrase searches, turns out we can just pass whatever we got |
211 | // to primo and they will interpret it correctly. |
212 | // leaving this flag in b/c it's not hurting anything, but we |
213 | // don't currently have a situation where we need to use "exact" |
214 | $precision = 'contains'; |
215 | if ($args['phrase']) { |
216 | $precision = 'exact'; |
217 | } |
218 | // determine which primo index to search |
219 | |
220 | //default index is any and initialize lookfor to an empty string |
221 | $lookin = 'any'; |
222 | $lookfor = ''; |
223 | |
224 | if (is_array($terms)) { |
225 | foreach ($terms as $thisTerm) { |
226 | //set the index to search |
227 | switch ($thisTerm['index']) { |
228 | case 'AllFields': |
229 | $lookin = 'any'; |
230 | break; |
231 | case 'Title': |
232 | $lookin = 'title'; |
233 | break; |
234 | case 'Author': |
235 | $lookin = 'creator'; |
236 | break; |
237 | case 'Subject': |
238 | $lookin = 'sub'; |
239 | break; |
240 | case 'Abstract': |
241 | $lookin = 'desc'; |
242 | break; |
243 | case 'ISSN': |
244 | $lookin = 'issn'; |
245 | break; |
246 | } |
247 | |
248 | //set the lookfor terms to search |
249 | $lookfor = str_replace(',', ' ', $thisTerm['lookfor']); |
250 | |
251 | //set precision |
252 | if (array_key_exists('op', $thisTerm) && !empty($thisTerm['op'])) { |
253 | $precision = $thisTerm['op']; |
254 | } |
255 | |
256 | $qs[] = "query=$lookin,$precision," . urlencode($lookfor); |
257 | } |
258 | } |
259 | |
260 | // continue only if lookfor is not an empty string |
261 | if (strlen($lookfor) > 0) { |
262 | // It's a giant nested thing! This is because we really have to |
263 | // have a query to send to primo or it hates us |
264 | |
265 | // QUERYSTRING: institution |
266 | $qs[] = "institution=$institution"; |
267 | |
268 | // QUERYSTRING: onCampus |
269 | if ($args['onCampus']) { |
270 | $qs[] = 'onCampus=true'; |
271 | } else { |
272 | $qs[] = 'onCampus=false'; |
273 | } |
274 | |
275 | // QUERYSTRING: didYouMean |
276 | if ($args['didYouMean']) { |
277 | $qs[] = 'dym=true'; |
278 | } else { |
279 | $qs[] = 'dym=false'; |
280 | } |
281 | |
282 | // QUERYSTRING: query (filter list) |
283 | // Date-related TODO: |
284 | // - provide additional support / processing for [x to y] limits? |
285 | // - sys/Summon.php messes with publication date to enable date |
286 | // range facet control in the interface. look for injectPubDate |
287 | if (!empty($args['filterList'])) { |
288 | foreach ($args['filterList'] as $current) { |
289 | $facet = $current['field']; |
290 | $facetOp = $current['facetOp']; |
291 | $values = $current['values']; |
292 | $values = array_map( |
293 | function ($value) { |
294 | return urlencode(str_replace(',', ' ', $value)); |
295 | }, |
296 | $values |
297 | ); |
298 | if ('OR' === $facetOp) { |
299 | $qs[] = "query_inc=facet_$facet,exact," . |
300 | implode(',', $values); |
301 | } elseif ('NOT' === $facetOp) { |
302 | $qs[] = "query_exc=facet_$facet,exact," . |
303 | implode(',', $values); |
304 | } else { |
305 | foreach ($values as $value) { |
306 | $qs[] = "query_inc=facet_$facet,exact,$value"; |
307 | } |
308 | } |
309 | } |
310 | } |
311 | |
312 | // QUERYSTRING: pcAvailability |
313 | // by default, PrimoCentral only returns matches, |
314 | // which are available via Holdingsfile |
315 | // pcAvailability = false |
316 | // By setting this value to true, also matches, which |
317 | // are NOT available via Holdingsfile are returned |
318 | // (yes, right, set this to true - that's ExLibris Logic) |
319 | if ($args['pcAvailability']) { |
320 | $qs[] = 'pcAvailability=true'; |
321 | } |
322 | |
323 | // QUERYSTRING: indx (start record) |
324 | $recordStart = ($args['pageNumber'] - 1) * $args['limit'] + 1; |
325 | $qs[] = "indx=$recordStart"; |
326 | |
327 | // TODO: put bulksize in conf file? set a reasonable cap... |
328 | // or is it better to grab each set of 20 through this api module? |
329 | // Look at how vufind/Summon does this... |
330 | // QUERYSTRING: bulkSize (limit, # of records to return) |
331 | $qs[] = 'bulkSize=' . $args['limit']; |
332 | |
333 | // QUERYSTRING: sort |
334 | // Looks like the possible values are "popularity" or "scdate" |
335 | // omit the field for default sorting |
336 | if (isset($args['sort']) && ($args['sort'] != 'relevance')) { |
337 | $qs[] = 'sortField=' . $args['sort']; |
338 | } |
339 | |
340 | // Highlighting |
341 | $qs[] = 'highlight=' . (empty($args['highlight']) ? 'false' : 'true'); |
342 | |
343 | // QUERYSTRING: loc |
344 | // all primocentral queries need this |
345 | $qs[] = 'loc=adaptor,primo_central_multiple_fe'; |
346 | |
347 | // Send Request |
348 | $result = $this->call(implode('&', $qs), $args); |
349 | } else { |
350 | return self::$emptyQueryResponse; |
351 | } |
352 | |
353 | return $result; |
354 | } |
355 | |
356 | /** |
357 | * Small wrapper for sendRequest, process to simplify error handling. |
358 | * |
359 | * @param string $qs Query string |
360 | * @param array $params Request parameters |
361 | * @param string $method HTTP method |
362 | * @param bool $cacheable Whether the request is cacheable |
363 | * |
364 | * @return object The parsed primo data |
365 | * @throws \Exception |
366 | */ |
367 | protected function call($qs, $params = [], $method = 'GET', $cacheable = true) |
368 | { |
369 | $this->debug("{$method}: {$this->host}{$qs}"); |
370 | $this->client->resetParameters(); |
371 | $baseUrl = null; |
372 | if ($method == 'GET') { |
373 | $baseUrl = $this->host . $qs; |
374 | } elseif ($method == 'POST') { |
375 | throw new \Exception('POST not supported'); |
376 | } |
377 | |
378 | $this->client->setUri($baseUrl); |
379 | $this->client->setMethod($method); |
380 | // Check cache: |
381 | $resultBody = null; |
382 | $cacheKey = null; |
383 | if ($cacheable && $this->cache) { |
384 | $cacheKey = $this->getCacheKey($this->client); |
385 | $resultBody = $this->getCachedData($cacheKey); |
386 | } |
387 | if (null === $resultBody) { |
388 | // Send request: |
389 | $result = $this->client->send(); |
390 | $resultBody = $result->getBody(); |
391 | if (!$result->isSuccess()) { |
392 | throw new \Exception($resultBody); |
393 | } |
394 | if ($cacheKey) { |
395 | $this->putCachedData($cacheKey, $resultBody); |
396 | } |
397 | } |
398 | return $this->process($resultBody, $params); |
399 | } |
400 | |
401 | /** |
402 | * Translate Primo's XML into array of arrays. |
403 | * |
404 | * @param string $data The raw xml from Primo |
405 | * @param array $params Request parameters |
406 | * |
407 | * @return array The processed response from Primo |
408 | */ |
409 | protected function process($data, $params = []) |
410 | { |
411 | // make sure data exists |
412 | if (strlen($data) == 0) { |
413 | throw new \Exception('Primo did not return any data'); |
414 | } |
415 | |
416 | // Load API content as XML objects |
417 | $sxe = new \SimpleXmlElement($data); |
418 | |
419 | if ($sxe === false) { |
420 | throw new \Exception('Error while parsing the document'); |
421 | } |
422 | |
423 | // some useful data about these results |
424 | $totalhitsarray = $sxe->xpath('//@TOTALHITS'); |
425 | |
426 | // if totalhits is missing but we have a message, this is an error |
427 | // situation. |
428 | if (!isset($totalhitsarray[0])) { |
429 | $messages = $sxe->xpath('//@MESSAGE'); |
430 | $message = isset($messages[0]) |
431 | ? (string)$messages[0] : 'TOTALHITS attribute missing.'; |
432 | throw new \Exception($message); |
433 | } else { |
434 | $totalhits = (int)$totalhitsarray[0]; |
435 | } |
436 | // TODO: would these be useful? |
437 | //$firsthit = $sxe->xpath('//@FIRSTHIT'); |
438 | //$lasthit = $sxe->xpath('//@LASTHIT'); |
439 | |
440 | // Register the 'sear' namespace at the top level to avoid problems: |
441 | $sxe->registerXPathNamespace( |
442 | 'sear', |
443 | 'http://www.exlibrisgroup.com/xsd/jaguar/search' |
444 | ); |
445 | |
446 | // Get the available namespaces. The Primo API uses multiple namespaces. |
447 | // Will be used to navigate the DOM for elements that have namespaces |
448 | $namespaces = $sxe->getNameSpaces(true); |
449 | |
450 | // Get results set data and add to $items array |
451 | // This foreach grabs all the child elements of sear:DOC, |
452 | // except those with namespaces |
453 | $items = []; |
454 | |
455 | $docset = $sxe->xpath('//sear:DOC'); |
456 | if (empty($docset) && isset($sxe->JAGROOT->RESULT->DOCSET->DOC)) { |
457 | $docset = $sxe->JAGROOT->RESULT->DOCSET->DOC; |
458 | } |
459 | |
460 | foreach ($docset as $doc) { |
461 | $item = []; |
462 | // Due to a bug in the primo API, the first result has |
463 | // a namespace (prim:) while the rest of the results do not. |
464 | // Those child elements do not get added to $doc. |
465 | // If the bib parent element (PrimoNMBib) is missing for a $doc, |
466 | // that means it has the prim namespace prefix. |
467 | // So first set the right prefix |
468 | $prefix = $doc; |
469 | if ($doc->PrimoNMBib != 'true' && isset($namespaces['prim'])) { |
470 | // Use the namespace prefix to get those missing child |
471 | // elements out of $doc. |
472 | $prefix = $doc->children($namespaces['prim']); |
473 | } |
474 | // Now, navigate the DOM and set values to the array |
475 | // cast to (string) to get the element's value not an XML object |
476 | $item['recordid'] |
477 | = substr((string)$prefix->PrimoNMBib->record->control->recordid, 3); |
478 | $item['title'] |
479 | = (string)$prefix->PrimoNMBib->record->display->title; |
480 | $item['format'] = [(string)$prefix->PrimoNMBib->record->display->type]; |
481 | // creators |
482 | $creator |
483 | = trim((string)$prefix->PrimoNMBib->record->display->creator); |
484 | if (strlen($creator) > 0) { |
485 | $item['creator'] = array_map('trim', explode(';', $creator)); |
486 | } |
487 | // subjects |
488 | $subject |
489 | = trim((string)$prefix->PrimoNMBib->record->display->subject); |
490 | if (strlen($subject) > 0) { |
491 | $item['subjects'] = explode(';', $subject); |
492 | } |
493 | $item['ispartof'] |
494 | = (string)$prefix->PrimoNMBib->record->display->ispartof; |
495 | // description is sort of complicated and will be processed after |
496 | // highlighting tags are handled. |
497 | $description = isset($prefix->PrimoNMBib->record->display->description) |
498 | ? (string)$prefix->PrimoNMBib->record->display->description |
499 | : (string)$prefix->PrimoNMBib->record->search->description; |
500 | $item['description'] = $description; |
501 | // and the rest! |
502 | $item['language'] |
503 | = (string)$prefix->PrimoNMBib->record->display->language; |
504 | $item['source'] |
505 | = implode('; ', (array)$prefix->PrimoNMBib->record->display->source); |
506 | $item['identifier'] |
507 | = (string)$prefix->PrimoNMBib->record->display->identifier; |
508 | $item['fulltext'] |
509 | = (string)$prefix->PrimoNMBib->record->delivery->fulltext; |
510 | |
511 | $item['issn'] = []; |
512 | foreach ($prefix->PrimoNMBib->record->search->issn as $issn) { |
513 | $item['issn'][] = (string)$issn; |
514 | } |
515 | |
516 | //Are these two needed? |
517 | //$item['publisher'] = |
518 | // (string)$prefix->PrimoNMBib->record->display->publisher; |
519 | //$item['peerreviewed'] = |
520 | // (string)$prefix->PrimoNMBib->record->display->lds50; |
521 | |
522 | // Get the URL, which has a separate namespace |
523 | $sear = $doc->children($namespaces['sear']); |
524 | $item['url'] = !empty($sear->LINKS->openurl) |
525 | ? (string)$sear->LINKS->openurl |
526 | : (string)$sear->GETIT->attributes()->GetIt2; |
527 | |
528 | // Container data |
529 | $addata = $prefix->PrimoNMBib->record->addata; |
530 | $item['container_title'] = (string)$addata->jtitle; |
531 | $item['container_volume'] = (string)$addata->volume; |
532 | $item['container_issue'] = (string)$addata->issue; |
533 | $item['container_start_page'] = (string)$addata->spage; |
534 | $item['container_end_page'] = (string)$addata->epage; |
535 | foreach ($addata->eissn as $eissn) { |
536 | if (!in_array((string)$eissn, $item['issn'])) { |
537 | $item['issn'][] = (string)$eissn; |
538 | } |
539 | } |
540 | foreach ($addata->issn as $issn) { |
541 | if (!in_array((string)$issn, $item['issn'])) { |
542 | $item['issn'][] = (string)$issn; |
543 | } |
544 | } |
545 | foreach ($addata->doi as $doi) { |
546 | $item['doi_str_mv'][] = (string)$doi; |
547 | } |
548 | |
549 | $processCitations = function ($data): array { |
550 | $result = []; |
551 | foreach ($data as $item) { |
552 | $result[] = 'cdi_' . (string)$item; |
553 | } |
554 | return $result; |
555 | }; |
556 | |
557 | // These require the cdi_ prefix in search, so add it right away: |
558 | $item['cites'] = $processCitations($prefix->PrimoNMBib->record->display->cites ?? []); |
559 | $item['cited_by'] = $processCitations($prefix->PrimoNMBib->record->display->citedby ?? []); |
560 | |
561 | // Remove dash-less ISSNs if there are corresponding dashed ones |
562 | // (We could convert dash-less ISSNs to dashed ones, but try to stay |
563 | // true to the metadata) |
564 | $callback = function ($issn) use ($item) { |
565 | return strlen($issn) != 8 |
566 | || !in_array( |
567 | substr($issn, 0, 4) . '-' . substr($issn, 4), |
568 | $item['issn'] |
569 | ); |
570 | }; |
571 | $item['issn'] = array_values(array_filter($item['issn'], $callback)); |
572 | |
573 | // Always process highlighting data as it seems Primo sometimes returns |
574 | // it (e.g. for CDI search) even if highlight parameter is set to false. |
575 | $this->processHighlighting($item, $params); |
576 | |
577 | // Fix description now that highlighting is done: |
578 | $item['description'] = $this->processDescription($item['description']); |
579 | |
580 | $item['fullrecord'] = $prefix->PrimoNMBib->record->asXml(); |
581 | $items[] = $item; |
582 | } |
583 | |
584 | // Set up variables with needed attribute names |
585 | // Makes matching attributes and getting their values easier |
586 | $att = 'NAME'; |
587 | $key = 'KEY'; |
588 | $value = 'VALUE'; |
589 | |
590 | // Get facet data and add to multidimensional $facets array |
591 | // Start by getting XML for each FACET element, |
592 | // which has the name of the facet as an attribute. |
593 | // We only get the first level of elements |
594 | // because child elements have a namespace prefix |
595 | $facets = []; |
596 | |
597 | $facetSet = $sxe->xpath('//sear:FACET'); |
598 | if (empty($facetSet)) { |
599 | if (!empty($sxe->JAGROOT->RESULT->FACETLIST)) { |
600 | $facetSet = $sxe->JAGROOT->RESULT->FACETLIST |
601 | ->children($namespaces['sear']); |
602 | } |
603 | } |
604 | |
605 | foreach ($facetSet as $facetlist) { |
606 | // Set first level of array with the facet name |
607 | $facet_name = (string)$facetlist->attributes()->$att; |
608 | |
609 | // Use the namespace prefix to get second level child elements |
610 | // (the facet values) out of $facetlist. |
611 | $sear_facets = $facetlist->children($namespaces['sear']); |
612 | foreach ($sear_facets as $facetvalues) { |
613 | // Second level of the array is facet values and their counts |
614 | $facet_key = (string)$facetvalues->attributes()->$key; |
615 | $facets[$facet_name][$facet_key] |
616 | = (string)$facetvalues->attributes()->$value; |
617 | } |
618 | } |
619 | |
620 | $didYouMean = []; |
621 | $suggestions = $sxe->xpath('//sear:QUERYTRANSFORMS'); |
622 | foreach ($suggestions as $suggestion) { |
623 | $didYouMean[] = (string)$suggestion->attributes()->QUERY; |
624 | } |
625 | |
626 | return [ |
627 | 'recordCount' => $totalhits, |
628 | 'documents' => $items, |
629 | 'facets' => $facets, |
630 | 'didYouMean' => $didYouMean, |
631 | ]; |
632 | } |
633 | |
634 | /** |
635 | * Retrieves a document specified by the ID. |
636 | * |
637 | * @param string $recordId The document to retrieve from the Primo API |
638 | * @param ?string $inst_code Institution code (optional) |
639 | * @param bool $onCampus Whether the user is on campus |
640 | * |
641 | * @throws \Exception |
642 | * @return array An array of query results |
643 | */ |
644 | public function getRecord(string $recordId, $inst_code = null, $onCampus = false) |
645 | { |
646 | if ('' === $recordId) { |
647 | return self::$emptyQueryResponse; |
648 | } |
649 | // Query String Parameters |
650 | $qs = []; |
651 | // There is currently (at 2015-12-17) a problem with Primo fetching |
652 | // records that have colons in the id (e.g. |
653 | // doaj_xmloai:doaj.org/article:94935655971c4917aab4fcaeafeb67b9). |
654 | // According to Ex Libris support we must use contains search without |
655 | // quotes for the time being. |
656 | // Escaping the - character causes problems getting records like |
657 | // wj10.1111/j.1475-679X.2011.00421.x |
658 | $qs[] = 'query=rid,contains,' |
659 | . urlencode(addcslashes($recordId, '":()')); |
660 | $qs[] = "institution=$inst_code"; |
661 | $qs[] = 'onCampus=' . ($onCampus ? 'true' : 'false'); |
662 | $qs[] = 'indx=1'; |
663 | $qs[] = 'bulkSize=1'; |
664 | $qs[] = 'loc=adaptor,primo_central_multiple_fe'; |
665 | // pcAvailability=true is needed for records, which |
666 | // are NOT in the PrimoCentral Holdingsfile. |
667 | // It won't hurt to have this parameter always set to true. |
668 | // But it'd hurt to have it not set in case you want to get |
669 | // a record, which is not in the Holdingsfile. |
670 | $qs[] = 'pcAvailability=true'; |
671 | |
672 | // Send Request |
673 | $result = $this->call(implode('&', $qs)); |
674 | |
675 | return $result; |
676 | } |
677 | |
678 | /** |
679 | * Retrieves multiple documents specified by the ID. |
680 | * |
681 | * @param array $recordIds The documents to retrieve from the Primo API |
682 | * @param ?string $inst_code Institution code (optional) |
683 | * @param bool $onCampus Whether the user is on campus |
684 | * |
685 | * @throws \Exception |
686 | * @return array An array of query results |
687 | */ |
688 | public function getRecords($recordIds, $inst_code = null, $onCampus = false) |
689 | { |
690 | // Callback function for formatting IDs: |
691 | $formatIds = function ($id) { |
692 | return addcslashes($id, '":()'); |
693 | }; |
694 | |
695 | // Query String Parameters |
696 | if ($recordIds) { |
697 | $qs = []; |
698 | $recordIds = array_map($formatIds, $recordIds); |
699 | $qs[] = 'query=rid,contains,' . urlencode(implode(' OR ', $recordIds)); |
700 | $qs[] = "institution=$inst_code"; |
701 | $qs[] = 'onCampus=' . ($onCampus ? 'true' : 'false'); |
702 | $qs[] = 'indx=1'; |
703 | $qs[] = 'bulkSize=' . count($recordIds); |
704 | $qs[] = 'loc=adaptor,primo_central_multiple_fe'; |
705 | // pcAvailability=true is needed for records, which |
706 | // are NOT in the PrimoCentral Holdingsfile. |
707 | // It won't hurt to have this parameter always set to true. |
708 | // But it'd hurt to have it not set in case you want to get |
709 | // a record, which is not in the Holdingsfile. |
710 | $qs[] = 'pcAvailability=true'; |
711 | |
712 | // Send Request |
713 | $result = $this->call(implode('&', $qs)); |
714 | } else { |
715 | return self::$emptyQueryResponse; |
716 | } |
717 | |
718 | return $result; |
719 | } |
720 | |
721 | /** |
722 | * Get the institution code based on user IP. If user is coming from |
723 | * off campus return |
724 | * |
725 | * @return string |
726 | */ |
727 | public function getInstitutionCode() |
728 | { |
729 | return $this->inst; |
730 | } |
731 | |
732 | /** |
733 | * Process highlighting tags of the record fields |
734 | * |
735 | * @param array $record Record data |
736 | * @param array $params Request params |
737 | * |
738 | * @return void |
739 | */ |
740 | protected function processHighlighting(&$record, $params) |
741 | { |
742 | $highlight = !empty($params['highlight']); |
743 | $startTag = $params['highlightStart'] ?? ''; |
744 | $endTag = $params['highlightEnd'] ?? ''; |
745 | |
746 | $highlightFields = [ |
747 | 'title' => 'title', |
748 | 'creator' => 'author', |
749 | 'description' => 'description', |
750 | ]; |
751 | |
752 | $hilightDetails = []; |
753 | foreach ($record as $field => $fieldData) { |
754 | $values = (array)$fieldData; |
755 | |
756 | // Collect highlighting details: |
757 | if (isset($highlightFields[$field])) { |
758 | $highlightedValues = []; |
759 | foreach ($values as $value) { |
760 | $count = 0; |
761 | $value = preg_replace( |
762 | $this->highlightRegEx, |
763 | "$startTag$1$endTag", |
764 | $value, |
765 | -1, |
766 | $count |
767 | ); |
768 | if ($count) { |
769 | // Account for double tags. Yes, it's possible. |
770 | $value = preg_replace( |
771 | $this->highlightRegEx, |
772 | '$1', |
773 | $value |
774 | ); |
775 | $highlightedValues[] = $value; |
776 | } |
777 | } |
778 | if ($highlightedValues) { |
779 | $hilightDetails[$highlightFields[$field]] = $highlightedValues; |
780 | } |
781 | } |
782 | |
783 | // Strip highlighting tags from all fields: |
784 | foreach ($values as &$value) { |
785 | $value = preg_replace( |
786 | $this->highlightRegEx, |
787 | '$1', |
788 | $value |
789 | ); |
790 | // Account for double tags. Yes, it's possible. |
791 | $value = preg_replace( |
792 | $this->highlightRegEx, |
793 | '$1', |
794 | $value |
795 | ); |
796 | } |
797 | // Unset reference: |
798 | unset($value); |
799 | $record[$field] = is_array($fieldData) ? $values : $values[0]; |
800 | |
801 | if ($highlight) { |
802 | $record['highlightDetails'] = $hilightDetails; |
803 | } |
804 | } |
805 | } |
806 | |
807 | /** |
808 | * Fix the description field by removing tags etc. |
809 | * |
810 | * @param string $description Description |
811 | * |
812 | * @return string |
813 | */ |
814 | protected function processDescription($description) |
815 | { |
816 | // Sometimes the entire article is in the description, so just take a chunk |
817 | // from the beginning. |
818 | $description = trim(mb_substr($description, 0, 2500, 'UTF-8')); |
819 | // These may contain all kinds of metadata, and just stripping |
820 | // tags mushes it all together confusingly. |
821 | $description = str_replace('<P>', '<p>', $description); |
822 | $paragraphs = explode('<p>', $description); |
823 | foreach ($paragraphs as &$value) { |
824 | // Strip tags, trim so array_filter can get rid of |
825 | // entries that would just have spaces |
826 | $value = trim(strip_tags($value)); |
827 | } |
828 | $paragraphs = array_filter($paragraphs); |
829 | // Now join paragraphs using line breaks |
830 | return implode('<br>', $paragraphs); |
831 | } |
832 | } |