Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
337 / 337 |
|
100.00% |
55 / 55 |
CRAP | |
100.00% |
1 / 1 |
EDS | |
100.00% |
337 / 337 |
|
100.00% |
55 / 55 |
128 | |
100.00% |
1 / 1 |
getUniqueID | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getShortTitle | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getSubtitle | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getItemsAbstract | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getAccessLevel | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getItemsAuthors | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getItemsAuthorsArray | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getCustomLinks | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFTCustomLinks | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDbLabel | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getHTMLFullText | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasHTMLFullTextAvailable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
itemIsExcluded | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
7 | |||
getItems | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
15 | |||
getPLink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPubType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPubTypeId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasEbookAvailable | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
hasPdfAvailable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasEpubAvailable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasLinkedFullTextAvailable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getEbookLink | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
5 | |||
getPdfLink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getEpubLink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLinkedFullTextLink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getItemsSubjects | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getThumbnail | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getItemsTitle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getTitle | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getPrimaryAuthors | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getItemsTitleSource | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
linkUrls | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
toHTML | |
100.00% |
69 / 69 |
|
100.00% |
1 / 1 |
4 | |||
replaceBRWithCommas | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getCleanDOI | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getLanguages | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
getFilteredIdentifiers | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
4 | |||
getISSNs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getISBNs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getContainerTitle | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
getFilteredNumbering | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
getContainerIssue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getContainerVolume | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPublicationDates | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
getContainerStartPage | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getContainerEndPage | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
5 | |||
getFormats | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
7 | |||
getPublishers | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getPlacesOfPublication | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getPublicationDetails | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getRawEDSPublicationDetails | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
5 | |||
extractEbscoData | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
extractEbscoDataFromItems | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
extractEbscoDataFromRecordInfo | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
recurseIntoRecordInfo | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | |
3 | /** |
4 | * Model for EDS records. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package RecordDrivers |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org/wiki/development:plugins:record_drivers Wiki |
28 | */ |
29 | |
30 | namespace VuFind\RecordDriver; |
31 | |
32 | use function count; |
33 | use function in_array; |
34 | use function is_array; |
35 | use function is_callable; |
36 | use function strlen; |
37 | |
38 | /** |
39 | * Model for EDS records. |
40 | * |
41 | * @category VuFind |
42 | * @package RecordDrivers |
43 | * @author Demian Katz <demian.katz@villanova.edu> |
44 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
45 | * @link https://vufind.org/wiki/development:plugins:record_drivers Wiki |
46 | */ |
47 | class EDS extends DefaultRecord |
48 | { |
49 | /** |
50 | * Document types that are treated as ePub links. |
51 | * |
52 | * @var array |
53 | */ |
54 | protected $epubTypes = ['ebook-epub']; |
55 | |
56 | /** |
57 | * Document types that are treated as PDF links. |
58 | * |
59 | * @var array |
60 | */ |
61 | protected $pdfTypes = ['ebook-pdf', 'pdflink']; |
62 | |
63 | /** |
64 | * Return the unique identifier of this record within the Solr index; |
65 | * useful for retrieving additional information (like tags and user |
66 | * comments) from the external MySQL database. |
67 | * |
68 | * @return string Unique identifier. |
69 | */ |
70 | public function getUniqueID() |
71 | { |
72 | $dbid = $this->fields['Header']['DbId']; |
73 | $an = $this->fields['Header']['An']; |
74 | return $dbid . ',' . $an; |
75 | } |
76 | |
77 | /** |
78 | * Get the short (pre-subtitle) title of the record. |
79 | * |
80 | * @return string |
81 | */ |
82 | public function getShortTitle() |
83 | { |
84 | $title = $this->getTitle(); |
85 | if (null == $title) { |
86 | return ''; |
87 | } |
88 | $parts = explode(':', $title); |
89 | return trim(current($parts)); |
90 | } |
91 | |
92 | /** |
93 | * Get the subtitle (if any) of the record. |
94 | * |
95 | * @return string |
96 | */ |
97 | public function getSubtitle() |
98 | { |
99 | $title = $this->getTitle(); |
100 | if (null == $title) { |
101 | return ''; |
102 | } |
103 | $parts = explode(':', $title, 2); |
104 | return count($parts) > 1 ? trim(array_pop($parts)) : ''; |
105 | } |
106 | |
107 | /** |
108 | * Get the abstract (summary) of the record. |
109 | * |
110 | * @return string |
111 | */ |
112 | public function getItemsAbstract() |
113 | { |
114 | $abstract = $this->getItems(null, null, 'Ab'); |
115 | return $abstract[0]['Data'] ?? ''; |
116 | } |
117 | |
118 | /** |
119 | * Get the access level of the record. |
120 | * |
121 | * @return string If not empty, will contain a numerical value corresponding to these levels of access: |
122 | * 0 - Not Available to search via Guest Access |
123 | * 1 - Metadata is searched, but only a placeholder record is displayed |
124 | * 2 - Display record in the results but no access to detailed record or full text |
125 | * 3 - Full access: search/display all content to guests |
126 | * 6 - Display full record but no access to full text |
127 | */ |
128 | public function getAccessLevel() |
129 | { |
130 | return $this->fields['Header']['AccessLevel'] ?? ''; |
131 | } |
132 | |
133 | /** |
134 | * Get the authors of the record |
135 | * |
136 | * @return string |
137 | */ |
138 | public function getItemsAuthors() |
139 | { |
140 | $authors = $this->getItemsAuthorsArray(); |
141 | return empty($authors) ? '' : implode('; ', $authors); |
142 | } |
143 | |
144 | /** |
145 | * Obtain an array or authors indicated on the record |
146 | * |
147 | * @return array |
148 | */ |
149 | protected function getItemsAuthorsArray() |
150 | { |
151 | return array_map( |
152 | function ($data) { |
153 | return $data['Data']; |
154 | }, |
155 | $this->getItems(null, null, 'Au') |
156 | ); |
157 | } |
158 | |
159 | /** |
160 | * Get the custom links of the record. |
161 | * |
162 | * @return array |
163 | */ |
164 | public function getCustomLinks() |
165 | { |
166 | return $this->fields['CustomLinks'] ?? []; |
167 | } |
168 | |
169 | /** |
170 | * Get the full text custom links of the record. |
171 | * |
172 | * @return array |
173 | */ |
174 | public function getFTCustomLinks() |
175 | { |
176 | return $this->fields['FullText']['CustomLinks'] ?? []; |
177 | } |
178 | |
179 | /** |
180 | * Get the database label of the record. |
181 | * |
182 | * @return string |
183 | */ |
184 | public function getDbLabel() |
185 | { |
186 | return $this->fields['Header']['DbLabel'] ?? ''; |
187 | } |
188 | |
189 | /** |
190 | * Get the full text of the record. |
191 | * |
192 | * @return string |
193 | */ |
194 | public function getHTMLFullText() |
195 | { |
196 | return $this->toHTML($this->fields['FullText']['Text']['Value'] ?? ''); |
197 | } |
198 | |
199 | /** |
200 | * Get the full text availability of the record. |
201 | * |
202 | * @return bool |
203 | */ |
204 | public function hasHTMLFullTextAvailable() |
205 | { |
206 | return '1' == ($this->fields['FullText']['Text']['Availability'] ?? '0'); |
207 | } |
208 | |
209 | /** |
210 | * Support method for getItems, used to apply filters. |
211 | * |
212 | * @param array $item Item to check |
213 | * @param string $context The context in which items are being retrieved |
214 | * (used for context-sensitive filtering) |
215 | * |
216 | * @return bool |
217 | */ |
218 | protected function itemIsExcluded($item, $context) |
219 | { |
220 | // Create a list of config sections to check, based on context: |
221 | $sections = ['ItemGlobalFilter']; |
222 | switch ($context) { |
223 | case 'result-list': |
224 | $sections[] = 'ItemResultListFilter'; |
225 | break; |
226 | case 'core': |
227 | $sections[] = 'ItemCoreFilter'; |
228 | break; |
229 | } |
230 | // Check to see if anything is filtered: |
231 | foreach ($sections as $section) { |
232 | $currentConfig = isset($this->recordConfig->$section) |
233 | ? $this->recordConfig->$section->toArray() : []; |
234 | $badLabels = (array)($currentConfig['excludeLabel'] ?? []); |
235 | $badGroups = (array)($currentConfig['excludeGroup'] ?? []); |
236 | if ( |
237 | in_array($item['Label'], $badLabels) |
238 | || in_array($item['Group'], $badGroups) |
239 | ) { |
240 | return true; |
241 | } |
242 | } |
243 | // If we got this far, no filter was applied: |
244 | return false; |
245 | } |
246 | |
247 | /** |
248 | * Get the items of the record. |
249 | * |
250 | * @param string $context The context in which items are being retrieved |
251 | * (used for context-sensitive filtering) |
252 | * @param string $labelFilter A specific label to retrieve (filter out others; |
253 | * null for no filter) |
254 | * @param string $groupFilter A specific group to retrieve (filter out others; |
255 | * null for no filter) |
256 | * @param string $nameFilter A specific name to retrieve (filter out others; |
257 | * null for no filter) |
258 | * |
259 | * @return array |
260 | */ |
261 | public function getItems( |
262 | $context = null, |
263 | $labelFilter = null, |
264 | $groupFilter = null, |
265 | $nameFilter = null |
266 | ) { |
267 | $items = []; |
268 | if (is_array($this->fields['Items'] ?? null)) { |
269 | $itemGlobalOrderConfig = $this->recordConfig?->ItemGlobalOrder?->toArray() ?? []; |
270 | $origItems = $this->fields['Items']; |
271 | // Only sort by label if we have a sort config and we're fetching multiple labels: |
272 | if (!empty($itemGlobalOrderConfig) && $labelFilter === null) { |
273 | // We want unassigned labels to appear AFTER configured labels: |
274 | $nextPos = max(array_keys($itemGlobalOrderConfig)); |
275 | foreach (array_keys($origItems) as $key) { |
276 | $label = $origItems[$key]['Label'] ?? ''; |
277 | $configuredPos = array_search($label, $itemGlobalOrderConfig); |
278 | $origItems[$key]['Pos'] = $configuredPos === false |
279 | ? ++$nextPos : $configuredPos; |
280 | } |
281 | $positions = array_column($origItems, 'Pos'); |
282 | array_multisort($positions, SORT_ASC, $origItems); |
283 | } |
284 | |
285 | foreach ($origItems as $item) { |
286 | $nextItem = [ |
287 | 'Label' => $item['Label'] ?? '', |
288 | 'Group' => $item['Group'] ?? '', |
289 | 'Name' => $item['Name'] ?? '', |
290 | 'Data' => isset($item['Data']) |
291 | ? $this->toHTML($item['Data'], $item['Group']) : '', |
292 | ]; |
293 | if ( |
294 | !$this->itemIsExcluded($nextItem, $context) |
295 | && ($labelFilter === null || $nextItem['Label'] === $labelFilter) |
296 | && ($groupFilter === null || $nextItem['Group'] === $groupFilter) |
297 | && ($nameFilter === null || $nextItem['Name'] === $nameFilter) |
298 | ) { |
299 | $items[] = $nextItem; |
300 | } |
301 | } |
302 | } |
303 | return $items; |
304 | } |
305 | |
306 | /** |
307 | * Get the full text url of the record. |
308 | * |
309 | * @return string |
310 | */ |
311 | public function getPLink() |
312 | { |
313 | return $this->fields['PLink'] ?? ''; |
314 | } |
315 | |
316 | /** |
317 | * Get the publication type of the record. |
318 | * |
319 | * @return string |
320 | */ |
321 | public function getPubType() |
322 | { |
323 | return $this->fields['Header']['PubType'] ?? ''; |
324 | } |
325 | |
326 | /** |
327 | * Get the publication type id of the record. |
328 | * |
329 | * @return string |
330 | */ |
331 | public function getPubTypeId() |
332 | { |
333 | return $this->fields['Header']['PubTypeId'] ?? ''; |
334 | } |
335 | |
336 | /** |
337 | * Get the ebook availability of the record. |
338 | * |
339 | * @param array $types Types that we are interested in checking for |
340 | * |
341 | * @return bool |
342 | */ |
343 | protected function hasEbookAvailable(array $types) |
344 | { |
345 | foreach ($this->fields['FullText']['Links'] ?? [] as $link) { |
346 | if (in_array($link['Type'] ?? '', $types)) { |
347 | return true; |
348 | } |
349 | } |
350 | return false; |
351 | } |
352 | |
353 | /** |
354 | * Get the PDF availability of the record. |
355 | * |
356 | * @return bool |
357 | */ |
358 | public function hasPdfAvailable() |
359 | { |
360 | return $this->hasEbookAvailable($this->pdfTypes); |
361 | } |
362 | |
363 | /** |
364 | * Get the ePub availability of the record. |
365 | * |
366 | * @return bool |
367 | */ |
368 | public function hasEpubAvailable() |
369 | { |
370 | return $this->hasEbookAvailable($this->epubTypes); |
371 | } |
372 | |
373 | /** |
374 | * Get the linked full text availability of the record. |
375 | * |
376 | * @return bool |
377 | */ |
378 | public function hasLinkedFullTextAvailable() |
379 | { |
380 | return $this->hasEbookAvailable(['other']); |
381 | } |
382 | |
383 | /** |
384 | * Get the ebook url of the record. If missing, return false |
385 | * |
386 | * @param array $types Types that we are interested in checking for |
387 | * |
388 | * @return string |
389 | */ |
390 | public function getEbookLink(array $types) |
391 | { |
392 | foreach ($this->fields['FullText']['Links'] ?? [] as $link) { |
393 | if ( |
394 | !empty($link['Type']) && !empty($link['Url']) |
395 | && in_array($link['Type'], $types) |
396 | ) { |
397 | return $link['Url']; |
398 | } |
399 | } |
400 | return false; |
401 | } |
402 | |
403 | /** |
404 | * Get the PDF url of the record. If missing, return false |
405 | * |
406 | * @return string |
407 | */ |
408 | public function getPdfLink() |
409 | { |
410 | return $this->getEbookLink($this->pdfTypes); |
411 | } |
412 | |
413 | /** |
414 | * Get the ePub url of the record. If missing, return false |
415 | * |
416 | * @return string |
417 | */ |
418 | public function getEpubLink() |
419 | { |
420 | return $this->getEbookLink($this->epubTypes); |
421 | } |
422 | |
423 | /** |
424 | * Get the linked full text url of the record. If missing, return false |
425 | * |
426 | * @return string |
427 | */ |
428 | public function getLinkedFullTextLink() |
429 | { |
430 | return $this->getEbookLink(['other']); |
431 | } |
432 | |
433 | /** |
434 | * Get the subject data of the record. |
435 | * |
436 | * @return string |
437 | */ |
438 | public function getItemsSubjects() |
439 | { |
440 | $subjects = array_map( |
441 | function ($data) { |
442 | return $data['Data']; |
443 | }, |
444 | $this->getItems(null, null, 'Su') |
445 | ); |
446 | return empty($subjects) ? '' : implode(', ', $subjects); |
447 | } |
448 | |
449 | /** |
450 | * Return a URL to a thumbnail preview of the record, if available; false |
451 | * otherwise. |
452 | * |
453 | * @param string $size Size of thumbnail (small, medium or large -- small is |
454 | * default). |
455 | * |
456 | * @return string |
457 | */ |
458 | public function getThumbnail($size = 'small') |
459 | { |
460 | foreach ($this->fields['ImageInfo'] ?? [] as $image) { |
461 | if ($size == ($image['Size'] ?? '')) { |
462 | return $image['Target'] ?? ''; |
463 | } |
464 | } |
465 | return false; |
466 | } |
467 | |
468 | /** |
469 | * Get the title of the record. |
470 | * |
471 | * @return string |
472 | */ |
473 | public function getItemsTitle() |
474 | { |
475 | $title = $this->getItems(null, null, 'Ti'); |
476 | return $title[0]['Data'] ?? ''; |
477 | } |
478 | |
479 | /** |
480 | * Obtain the title of the record from the record info section |
481 | * |
482 | * @return string |
483 | */ |
484 | public function getTitle() |
485 | { |
486 | $list = $this->extractEbscoDataFromRecordInfo('BibRecord/BibEntity/Titles'); |
487 | foreach ($list as $titleRecord) { |
488 | if ('main' == ($titleRecord['Type'] ?? '')) { |
489 | return $titleRecord['TitleFull']; |
490 | } |
491 | } |
492 | return ''; |
493 | } |
494 | |
495 | /** |
496 | * Obtain the authors from a record from the RecordInfo section |
497 | * |
498 | * @return array |
499 | */ |
500 | public function getPrimaryAuthors() |
501 | { |
502 | $authors = $this->extractEbscoDataFromRecordInfo( |
503 | 'BibRecord/BibRelationships/HasContributorRelationships/*/' |
504 | . 'PersonEntity/Name/NameFull' |
505 | ); |
506 | return array_unique(array_filter($authors)); |
507 | } |
508 | |
509 | /** |
510 | * Get the source of the record. |
511 | * |
512 | * @return string |
513 | */ |
514 | public function getItemsTitleSource() |
515 | { |
516 | $title = $this->getItems(null, null, 'Src'); |
517 | return $title[0]['Data'] ?? ''; |
518 | } |
519 | |
520 | /** |
521 | * Performs a regex and replaces any url's with links containing themselves |
522 | * as the text. Also replaces link elements with anchors. |
523 | * |
524 | * @param string $string String to process |
525 | * |
526 | * @return string HTML string |
527 | */ |
528 | public function linkUrls($string) |
529 | { |
530 | $isLink = preg_match( |
531 | '/^<link linkTarget="URL" linkTerm="([^"]+)"[^<]*<\/link>$/', |
532 | $string, |
533 | $matches |
534 | ); |
535 | if ($isLink) { |
536 | $string = $matches[1]; |
537 | } |
538 | $linkedString = preg_replace_callback( |
539 | "/\b(https?):\/\/([-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|]*)\b/i", |
540 | function ($matches) { |
541 | return "<a href='" . $matches[0] . "'>" |
542 | . htmlentities($matches[0]) . '</a>'; |
543 | }, |
544 | $string |
545 | ); |
546 | return $linkedString; |
547 | } |
548 | |
549 | /** |
550 | * Parse a SimpleXml element and |
551 | * return it's inner XML as an HTML string |
552 | * |
553 | * @param SimpleXml $data A SimpleXml DOM |
554 | * @param string $group Group identifier |
555 | * |
556 | * @return string The HTML string |
557 | */ |
558 | protected function toHTML($data, $group = null) |
559 | { |
560 | // Map xml tags to the HTML tags |
561 | // This is just a small list, the total number of xml tags is far greater |
562 | |
563 | // Any group can be added here, but we only use Au (Author) |
564 | // Other groups, not present here, won't be transformed to HTML links |
565 | $allowed_searchlink_groups = ['au','su']; |
566 | |
567 | $xml_to_html_tags = [ |
568 | '<jsection' => '<section', |
569 | '</jsection' => '</section', |
570 | '<highlight' => '<span class="highlight"', |
571 | '<highligh' => '<span class="highlight"', // Temporary bug fix |
572 | '</highlight>' => '</span>', // Temporary bug fix |
573 | '</highligh' => '</span>', |
574 | '<text' => '<div', |
575 | '</text' => '</div', |
576 | '<title' => '<h2', |
577 | '</title' => '</h2', |
578 | '<anid' => '<p', |
579 | '</anid' => '</p', |
580 | '<aug' => '<p class="aug"', |
581 | '</aug' => '</p', |
582 | '<hd' => '<h3', |
583 | '</hd' => '</h3', |
584 | '<linebr' => '<br', |
585 | '</linebr' => '', |
586 | '<olist' => '<ol', |
587 | '</olist' => '</ol', |
588 | '<reflink' => '<a', |
589 | '</reflink' => '</a', |
590 | '<blist' => '<p class="blist"', |
591 | '</blist' => '</p', |
592 | '<bibl' => '<a', |
593 | '</bibl' => '</a', |
594 | '<bibtext' => '<span', |
595 | '</bibtext' => '</span', |
596 | '<ref' => '<div class="ref"', |
597 | '</ref' => '</div', |
598 | '<ulink' => '<a', |
599 | '</ulink' => '</a', |
600 | '<superscript' => '<sup', |
601 | '</superscript' => '</sup', |
602 | '<relatesTo' => '<sup', |
603 | '</relatesTo' => '</sup', |
604 | ]; |
605 | |
606 | // The XML data is escaped, let's unescape html entities (e.g. < => <) |
607 | $data = html_entity_decode($data, ENT_QUOTES, 'utf-8'); |
608 | |
609 | // Start parsing the xml data |
610 | if (!empty($data)) { |
611 | // Replace the XML tags with HTML tags |
612 | $search = array_keys($xml_to_html_tags); |
613 | $replace = array_values($xml_to_html_tags); |
614 | $data = str_replace($search, $replace, $data); |
615 | |
616 | // Temporary : fix unclosed tags |
617 | $data = preg_replace('/<\/highlight/', '</span>', $data); |
618 | $data = preg_replace('/<\/span>>/', '</span>', $data); |
619 | $data = preg_replace('/<\/searchLink/', '</searchLink>', $data); |
620 | $data = preg_replace('/<\/searchLink>>/', '</searchLink>', $data); |
621 | |
622 | //$searchBase = $this->url('eds-search'); |
623 | // Parse searchLinks |
624 | if (!empty($group)) { |
625 | $group = strtolower($group); |
626 | if (in_array($group, $allowed_searchlink_groups)) { |
627 | $type = strtoupper($group); |
628 | $link_xml = '/<searchLink fieldCode="([^\"]*)" ' |
629 | . 'term="(%22[^\"]*%22)">/'; |
630 | $link_html = '<a href="../EDS/Search?lookfor=$2&type=' |
631 | . urlencode($type) . '">'; |
632 | $data = preg_replace($link_xml, $link_html, $data); |
633 | $data = str_replace('</searchLink>', '</a>', $data); |
634 | } |
635 | } |
636 | |
637 | // Replace the rest of searchLinks with simple spans |
638 | $link_xml = '/<searchLink fieldCode="([^\"]*)" term="%22([^\"]*)%22">/'; |
639 | $link_html = '<span>'; |
640 | $data = preg_replace($link_xml, $link_html, $data); |
641 | $data = str_replace('</searchLink>', '</span>', $data); |
642 | |
643 | // Parse bibliography (anchors and links) |
644 | $data = preg_replace('/<a idref="([^\"]*)"/', '<a href="#$1"', $data); |
645 | $data = preg_replace( |
646 | '/<a id="([^\"]*)" idref="([^\"]*)" type="([^\"]*)"/', |
647 | '<a id="$1" href="#$2"', |
648 | $data |
649 | ); |
650 | |
651 | $data = $this->replaceBRWithCommas($data, $group); |
652 | } |
653 | |
654 | return $data; |
655 | } |
656 | |
657 | /** |
658 | * Replace <br> tags that are embedded in data to commas |
659 | * |
660 | * @param string $data Data to process |
661 | * @param string $group Group identifier |
662 | * |
663 | * @return string |
664 | */ |
665 | protected function replaceBRWithCommas($data, $group) |
666 | { |
667 | $groupsToReplace = ['au','su']; |
668 | if (in_array($group, $groupsToReplace)) { |
669 | $br = '/<br \/>/'; |
670 | $comma = ', '; |
671 | return preg_replace($br, $comma, $data); |
672 | } |
673 | return $data; |
674 | } |
675 | |
676 | /** |
677 | * Return the first valid DOI found in the record (false if none). |
678 | * |
679 | * @return mixed |
680 | */ |
681 | public function getCleanDOI() |
682 | { |
683 | $doi = $this->getItems(null, null, null, 'DOI'); |
684 | if (isset($doi[0]['Data'])) { |
685 | return $doi[0]['Data']; |
686 | } |
687 | $dois = $this->getFilteredIdentifiers(['doi']); |
688 | return $dois[0] ?? false; |
689 | } |
690 | |
691 | /** |
692 | * Get record languages |
693 | * |
694 | * @return array |
695 | */ |
696 | public function getLanguages() |
697 | { |
698 | return $this->extractEbscoData( |
699 | [ |
700 | 'RecordInfo:BibRecord/BibEntity/Languages/*/Text', |
701 | 'Items:Languages', |
702 | 'Items:Language', |
703 | ] |
704 | ); |
705 | } |
706 | |
707 | /** |
708 | * Retrieve identifiers from the EBSCO record and retrieve values filtered by |
709 | * type. |
710 | * |
711 | * @param array $filter Type values to retrieve. |
712 | * |
713 | * @return array |
714 | */ |
715 | protected function getFilteredIdentifiers($filter) |
716 | { |
717 | $raw = array_merge( |
718 | $this->extractEbscoDataFromRecordInfo( |
719 | 'BibRecord/BibRelationships/IsPartOfRelationships/*' |
720 | . '/BibEntity/Identifiers' |
721 | ), |
722 | $this->extractEbscoDataFromRecordInfo( |
723 | 'BibRecord/BibEntity/Identifiers' |
724 | ) |
725 | ); |
726 | $ids = []; |
727 | foreach ($raw as $data) { |
728 | $type = strtolower($data['Type'] ?? ''); |
729 | if (isset($data['Value']) && in_array($type, $filter)) { |
730 | $ids[] = $data['Value']; |
731 | } |
732 | } |
733 | return $ids; |
734 | } |
735 | |
736 | /** |
737 | * Get ISSNs (of containing record) |
738 | * |
739 | * @return array |
740 | */ |
741 | public function getISSNs() |
742 | { |
743 | return $this->getFilteredIdentifiers(['issn-print', 'issn-electronic']); |
744 | } |
745 | |
746 | /** |
747 | * Get an array of ISBNs |
748 | * |
749 | * @return array |
750 | */ |
751 | public function getISBNs() |
752 | { |
753 | return $this->getFilteredIdentifiers(['isbn-print', 'isbn-electronic']); |
754 | } |
755 | |
756 | /** |
757 | * Get title of containing record |
758 | * |
759 | * @return string |
760 | */ |
761 | public function getContainerTitle() |
762 | { |
763 | // If there is no source, we don't want to identify a container |
764 | // (in this situation, it is likely redundant data): |
765 | if (count($this->extractEbscoDataFromItems('Source')) === 0) { |
766 | return ''; |
767 | } |
768 | $data = $this->extractEbscoDataFromRecordInfo( |
769 | 'BibRecord/BibRelationships/IsPartOfRelationships/0' |
770 | . '/BibEntity/Titles/0/TitleFull' |
771 | ); |
772 | return $data[0] ?? ''; |
773 | } |
774 | |
775 | /** |
776 | * Extract numbering data of a particular type. |
777 | * |
778 | * @param string $type Numbering type to return, if present. |
779 | * |
780 | * @return string |
781 | */ |
782 | protected function getFilteredNumbering($type) |
783 | { |
784 | $numbering = $this->extractEbscoDataFromRecordInfo( |
785 | 'BibRecord/BibRelationships/IsPartOfRelationships/*/BibEntity/Numbering' |
786 | ); |
787 | foreach ($numbering as $data) { |
788 | if ( |
789 | strtolower($data['Type'] ?? '') == $type |
790 | && !empty($data['Value']) |
791 | ) { |
792 | return $data['Value']; |
793 | } |
794 | } |
795 | return ''; |
796 | } |
797 | |
798 | /** |
799 | * Get issue of containing record |
800 | * |
801 | * @return string |
802 | */ |
803 | public function getContainerIssue() |
804 | { |
805 | return $this->getFilteredNumbering('issue'); |
806 | } |
807 | |
808 | /** |
809 | * Get volume of containing record |
810 | * |
811 | * @return string |
812 | */ |
813 | public function getContainerVolume() |
814 | { |
815 | return $this->getFilteredNumbering('volume'); |
816 | } |
817 | |
818 | /** |
819 | * Get the publication dates of the record. See also getDateSpan(). |
820 | * |
821 | * @return array |
822 | */ |
823 | public function getPublicationDates() |
824 | { |
825 | $pubDates = array_map( |
826 | function ($data) { |
827 | return $data->getDate(); |
828 | }, |
829 | $this->getRawEDSPublicationDetails() |
830 | ); |
831 | return !empty($pubDates) ? $pubDates : $this->extractEbscoDataFromRecordInfo( |
832 | 'BibRecord/BibRelationships/IsPartOfRelationships/0/BibEntity/Dates/0/Y' |
833 | ); |
834 | } |
835 | |
836 | /** |
837 | * Get year of containing record |
838 | * |
839 | * @return string |
840 | */ |
841 | public function getContainerStartPage() |
842 | { |
843 | $pagination = $this->extractEbscoDataFromRecordInfo( |
844 | 'BibRecord/BibEntity/PhysicalDescription/Pagination' |
845 | ); |
846 | return $pagination['StartPage'] ?? ''; |
847 | } |
848 | |
849 | /** |
850 | * Get the end page of the item that contains this record. |
851 | * |
852 | * @return string |
853 | */ |
854 | public function getContainerEndPage() |
855 | { |
856 | // EBSCO doesn't make this information readily available, but in some |
857 | // cases we can abstract it from an OpenURL. |
858 | $startPage = $this->getContainerStartPage(); |
859 | if (!empty($startPage)) { |
860 | $regex = "/&pages={$startPage}-(\d+)/"; |
861 | foreach ($this->getFTCustomLinks() as $link) { |
862 | if (preg_match($regex, $link['Url'] ?? '', $matches)) { |
863 | if (isset($matches[1])) { |
864 | return $matches[1]; |
865 | } |
866 | } |
867 | } |
868 | } |
869 | return ''; |
870 | } |
871 | |
872 | /** |
873 | * Returns an array of formats based on publication type. |
874 | * |
875 | * @return array |
876 | */ |
877 | public function getFormats() |
878 | { |
879 | $formats = []; |
880 | $pubType = $this->getPubType(); |
881 | switch (strtolower($pubType)) { |
882 | case 'academic journal': |
883 | case 'periodical': |
884 | case 'report': |
885 | // Add "article" format for better OpenURL generation |
886 | $formats[] = $pubType; |
887 | $formats[] = 'Article'; |
888 | break; |
889 | case 'ebook': |
890 | // Treat eBooks as both "Books" and "Electronic" items |
891 | $formats[] = 'Book'; |
892 | $formats[] = 'Electronic'; |
893 | break; |
894 | case 'dissertation/thesis': |
895 | // Simplify wording for consistency with other drivers |
896 | $formats[] = 'Thesis'; |
897 | break; |
898 | default: |
899 | $formats[] = $pubType; |
900 | } |
901 | |
902 | return $formats; |
903 | } |
904 | |
905 | /** |
906 | * Get the publishers of the record. |
907 | * |
908 | * @return array |
909 | */ |
910 | public function getPublishers() |
911 | { |
912 | return array_map( |
913 | function ($data) { |
914 | return $data->getName(); |
915 | }, |
916 | $this->getRawEDSPublicationDetails() |
917 | ); |
918 | } |
919 | |
920 | /** |
921 | * Get the item's place of publication. |
922 | * |
923 | * @return array |
924 | */ |
925 | public function getPlacesOfPublication() |
926 | { |
927 | return array_map( |
928 | function ($data) { |
929 | return $data->getPlace(); |
930 | }, |
931 | $this->getRawEDSPublicationDetails() |
932 | ); |
933 | } |
934 | |
935 | /** |
936 | * Get an array of publication detail lines combining information from |
937 | * getPublicationDates(), getPublishers() and getPlacesOfPublication(). |
938 | * |
939 | * @return array |
940 | */ |
941 | public function getPublicationDetails() |
942 | { |
943 | $details = $this->getRawEDSPublicationDetails(); |
944 | return !empty($details) ? $details : parent::getPublicationDetails(); |
945 | } |
946 | |
947 | /** |
948 | * Attempt to build up publication details from raw EDS data. |
949 | * |
950 | * @return array |
951 | */ |
952 | protected function getRawEDSPublicationDetails() |
953 | { |
954 | $details = []; |
955 | foreach ($this->getItems(null, 'Publication Information') as $pub) { |
956 | // Try to extract place, publisher and date: |
957 | if (preg_match('/^(.+):(.*)\.\s*(\d{4})$/', $pub['Data'], $matches)) { |
958 | [$place, $pub, $date] = [trim($matches[1]), trim($matches[2]), $matches[3]]; |
959 | } elseif (preg_match('/^(.+):(.*)$/', $pub['Data'], $matches)) { |
960 | [$place, $pub, $date] = [trim($matches[1]), trim($matches[2]), '']; |
961 | } else { |
962 | [$place, $pub, $date] = ['', $pub['Data'], '']; |
963 | } |
964 | |
965 | // In some cases, the place may have noise on the front that needs |
966 | // to be removed... |
967 | $placeParts = explode('.', $place); |
968 | $shortPlace = array_pop($placeParts); |
969 | $details[] = new Response\PublicationDetails( |
970 | strlen($shortPlace) > 5 ? $shortPlace : $place, |
971 | $pub, |
972 | $date |
973 | ); |
974 | } |
975 | return $details; |
976 | } |
977 | |
978 | /** |
979 | * Extract data from EBSCO API response using a prioritized list of selectors. |
980 | * Selectors can be of the form Items:Label to invoke extractEbscoDataFromItems, |
981 | * or RecordInfo:Path/To/Data/Element to invoke extractEbscoDataFromRecordInfo. |
982 | * |
983 | * @param array $selectors Array of selector strings for extracting data. |
984 | * |
985 | * @return array |
986 | */ |
987 | protected function extractEbscoData($selectors) |
988 | { |
989 | $result = []; |
990 | foreach ($selectors as $selector) { |
991 | [$method, $params] = explode(':', $selector, 2); |
992 | $fullMethod = 'extractEbscoDataFrom' . ucwords($method); |
993 | if (!is_callable([$this, $fullMethod])) { |
994 | throw new \Exception('Undefined method: ' . $fullMethod); |
995 | } |
996 | $result = $this->$fullMethod($params); |
997 | if (!empty($result)) { |
998 | break; |
999 | } |
1000 | } |
1001 | return $result; |
1002 | } |
1003 | |
1004 | /** |
1005 | * Extract data from the record's "Items" array, based on a label. |
1006 | * |
1007 | * @param string $label Label to filter on. |
1008 | * |
1009 | * @return array |
1010 | */ |
1011 | protected function extractEbscoDataFromItems($label) |
1012 | { |
1013 | $items = $this->getItems(null, $label); |
1014 | $output = []; |
1015 | foreach ($items as $item) { |
1016 | $output[] = $item['Data']; |
1017 | } |
1018 | return $output; |
1019 | } |
1020 | |
1021 | /** |
1022 | * Extract data from the record's "RecordInfo" array, based on a path. |
1023 | * |
1024 | * @param string $path Path to select with (slash-separated element names, |
1025 | * with special * selector to iterate through all children). |
1026 | * |
1027 | * @return array |
1028 | */ |
1029 | protected function extractEbscoDataFromRecordInfo($path) |
1030 | { |
1031 | return (array)$this->recurseIntoRecordInfo( |
1032 | $this->fields['RecordInfo'] ?? [], |
1033 | explode('/', $path) |
1034 | ); |
1035 | } |
1036 | |
1037 | /** |
1038 | * Recursive support method for extractEbscoDataFromRecordInfo(). |
1039 | * |
1040 | * @param array $data Data to recurse into |
1041 | * @param array $path Array representing path into data |
1042 | * |
1043 | * @return array |
1044 | */ |
1045 | protected function recurseIntoRecordInfo($data, $path) |
1046 | { |
1047 | $nextField = array_shift($path); |
1048 | $keys = $nextField === '*' ? array_keys($data) : [$nextField]; |
1049 | $values = []; |
1050 | foreach ($keys as $key) { |
1051 | if (isset($data[$key])) { |
1052 | $values[] = empty($path) |
1053 | ? $data[$key] |
1054 | : $this->recurseIntoRecordInfo($data[$key], $path); |
1055 | } |
1056 | } |
1057 | return count($values) == 1 ? $values[0] : $values; |
1058 | } |
1059 | } |