Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
88 / 88 |
|
100.00% |
11 / 11 |
CRAP | |
100.00% |
1 / 1 |
RecordXmlFormatter | |
100.00% |
88 / 88 |
|
100.00% |
11 / 11 |
35 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
fixNamespaces | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
7 | |||
createTag | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getIdAdditions | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
getHeaderSetAdditions | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
getHeaderAdditions | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
8 | |||
extractHigherLevelAttributes | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
3 | |||
performGlobalReplace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
format | |
100.00% |
33 / 33 |
|
100.00% |
1 / 1 |
3 | |||
needsSetNames | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSetNames | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | /** |
4 | * OAI-PMH XML Record Formatter |
5 | * |
6 | * PHP version 7 |
7 | * |
8 | * Copyright (c) Demian Katz 2016. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Harvest_Tools |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org/wiki/indexing:oai-pmh Wiki |
28 | */ |
29 | |
30 | namespace VuFindHarvest\OaiPmh; |
31 | |
32 | /** |
33 | * OAI-PMH XML Record Formatter |
34 | * |
35 | * @category VuFind |
36 | * @package Harvest_Tools |
37 | * @author Demian Katz <demian.katz@villanova.edu> |
38 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
39 | * @link https://vufind.org/wiki/indexing:oai-pmh Wiki |
40 | */ |
41 | class RecordXmlFormatter |
42 | { |
43 | /** |
44 | * Search strings for global search-and-replace. |
45 | * |
46 | * @var array |
47 | */ |
48 | protected $globalSearch = []; |
49 | |
50 | /** |
51 | * Replacement strings for global search-and-replace. |
52 | * |
53 | * @var array |
54 | */ |
55 | protected $globalReplace = []; |
56 | |
57 | /** |
58 | * Tag to use for injecting IDs into XML (false for none) |
59 | * |
60 | * @var string|bool |
61 | */ |
62 | protected $injectId = false; |
63 | |
64 | /** |
65 | * Tag to use for injecting setSpecs (false for none) |
66 | * |
67 | * @var string|bool |
68 | */ |
69 | protected $injectSetSpec = false; |
70 | |
71 | /** |
72 | * Tag to use for injecting set names (false for none) |
73 | * |
74 | * @var string|bool |
75 | */ |
76 | protected $injectSetName = false; |
77 | |
78 | /** |
79 | * Tag to use for injecting datestamp (false for none) |
80 | * |
81 | * @var string|bool |
82 | */ |
83 | protected $injectDate = false; |
84 | |
85 | /** |
86 | * List of header elements to copy into body |
87 | * |
88 | * @var array |
89 | */ |
90 | protected $injectHeaderElements = []; |
91 | |
92 | /** |
93 | * Associative array of setSpec => setName |
94 | * |
95 | * @var array |
96 | */ |
97 | protected $setNames = []; |
98 | |
99 | /** |
100 | * Constructor |
101 | * |
102 | * @param array $settings Configuration settings |
103 | */ |
104 | public function __construct($settings = []) |
105 | { |
106 | // Settings that may be mapped directly from $settings to class properties: |
107 | $mappableSettings = [ |
108 | 'globalSearch', 'globalReplace', |
109 | 'injectId', 'injectDate', 'injectHeaderElements', |
110 | 'injectSetName', 'injectSetSpec', |
111 | ]; |
112 | foreach ($mappableSettings as $current) { |
113 | if (isset($settings[$current])) { |
114 | $this->$current = $settings[$current]; |
115 | } |
116 | } |
117 | |
118 | // Where appropriate, normalize elements to array format: |
119 | $this->globalSearch = (array)$this->globalSearch; |
120 | $this->globalReplace = (array)$this->globalReplace; |
121 | $this->injectHeaderElements = (array)$this->injectHeaderElements; |
122 | } |
123 | |
124 | /** |
125 | * Fix namespaces in the top tag of the XML document to compensate for bugs |
126 | * in the SimpleXML library. |
127 | * |
128 | * @param string $xml XML document to clean up |
129 | * @param array $ns Namespaces to check |
130 | * @param string $attr Attributes extracted from the <metadata> tag |
131 | * |
132 | * @return string |
133 | */ |
134 | protected function fixNamespaces($xml, $ns, $attr = '') |
135 | { |
136 | foreach ($ns as $key => $val) { |
137 | if ( |
138 | !empty($key) |
139 | && strstr($xml, $key . ':') && !strstr($xml, 'xmlns:' . $key) |
140 | && !strstr($attr, 'xmlns:' . $key) |
141 | ) { |
142 | $attr .= ' xmlns:' . $key . '="' . $val . '"'; |
143 | } |
144 | } |
145 | if (!empty($attr)) { |
146 | $xml = preg_replace('/>/', ' ' . $attr . '>', $xml, 1); |
147 | } |
148 | return $xml; |
149 | } |
150 | |
151 | /** |
152 | * Format a line of XML. |
153 | * |
154 | * @param string $tag Tag name |
155 | * @param string $value Content of tag |
156 | * |
157 | * @return string |
158 | */ |
159 | protected function createTag($tag, $value) |
160 | { |
161 | return "<{$tag}>" . htmlspecialchars($value) . "</{$tag}>"; |
162 | } |
163 | |
164 | /** |
165 | * Format the ID as an XML tag for inclusion in final record. |
166 | * |
167 | * @param string $id Record ID |
168 | * |
169 | * @return string |
170 | */ |
171 | protected function getIdAdditions($id) |
172 | { |
173 | return $this->injectId ? $this->createTag($this->injectId, $id) : ''; |
174 | } |
175 | |
176 | /** |
177 | * Format setSpec header element as XML tags for inclusion in final record. |
178 | * |
179 | * @param object $setSpec Header setSpec element (in SimpleXML format). |
180 | * |
181 | * @return string |
182 | */ |
183 | protected function getHeaderSetAdditions($setSpec) |
184 | { |
185 | $insert = ''; |
186 | foreach ($setSpec as $current) { |
187 | $set = (string)$current; |
188 | if ($this->injectSetSpec) { |
189 | $insert .= $this->createTag($this->injectSetSpec, $set); |
190 | } |
191 | if ($this->injectSetName) { |
192 | $name = $this->setNames[$set] ?? $set; |
193 | $insert .= $this->createTag($this->injectSetName, $name); |
194 | } |
195 | } |
196 | return $insert; |
197 | } |
198 | |
199 | /** |
200 | * Format header elements as XML tags for inclusion in final record. |
201 | * |
202 | * @param object $header Header element (in SimpleXML format). |
203 | * |
204 | * @return string |
205 | */ |
206 | protected function getHeaderAdditions($header) |
207 | { |
208 | $insert = ''; |
209 | if ($this->injectDate) { |
210 | $insert .= $this |
211 | ->createTag($this->injectDate, (string)$header->datestamp); |
212 | } |
213 | if ( |
214 | isset($header->setSpec) |
215 | && ($this->injectSetSpec || $this->injectSetName) |
216 | ) { |
217 | $insert .= $this->getHeaderSetAdditions($header->setSpec); |
218 | } |
219 | if ($this->injectHeaderElements) { |
220 | foreach ($this->injectHeaderElements as $element) { |
221 | if (isset($header->$element)) { |
222 | $insert .= $header->$element->asXML(); |
223 | } |
224 | } |
225 | } |
226 | return $insert; |
227 | } |
228 | |
229 | /** |
230 | * Extract attributes from a higher-level tag that need to be inserted |
231 | * into the metadata record contained within the tag. |
232 | * |
233 | * @param string $raw The full outer XML |
234 | * @param string $tagName The name of the outermost tag in $raw |
235 | * @param string $record The metadata record with the outer <metadata> tag |
236 | * stripped off. |
237 | * |
238 | * @return array |
239 | */ |
240 | protected function extractHigherLevelAttributes( |
241 | string $raw, |
242 | string $tagName, |
243 | string $record |
244 | ): array { |
245 | // remove all attributes from extractedNs that appear deeper in xml; this |
246 | // helps prevent fatal errors caused by the same namespace declaration |
247 | // appearing twice in a single tag. |
248 | $extractedNs = []; |
249 | preg_match('/^<' . $tagName . '([^\>]*)>/', $raw, $extractedNs); |
250 | $attributes = []; |
251 | preg_match_all( |
252 | '/(^| )([^"]*"?[^"]*"|[^\']*\'?[^\']*\')/', |
253 | $extractedNs[1], |
254 | $attributes |
255 | ); |
256 | $extractedAttributes = []; |
257 | foreach ($attributes[0] as $attribute) { |
258 | $attribute = trim($attribute); |
259 | // if $attribute appears in xml, remove it: |
260 | if (!strstr($record, $attribute)) { |
261 | $extractedAttributes[] = $attribute; |
262 | } |
263 | } |
264 | return $extractedAttributes; |
265 | } |
266 | |
267 | /** |
268 | * Perform global search and replace. |
269 | * |
270 | * @param string $xml XML to update. |
271 | * |
272 | * @return string |
273 | */ |
274 | protected function performGlobalReplace($xml) |
275 | { |
276 | return empty($this->globalSearch) |
277 | ? $xml |
278 | : preg_replace($this->globalSearch, $this->globalReplace, $xml); |
279 | } |
280 | |
281 | /** |
282 | * Save a record to disk. |
283 | * |
284 | * @param string $id ID of record to save. |
285 | * @param object $recordObj Record to save (in SimpleXML format). |
286 | * |
287 | * @return string |
288 | */ |
289 | public function format($id, $recordObj) |
290 | { |
291 | if (!isset($recordObj->metadata)) { |
292 | throw new \Exception('Unexpected missing record metadata.'); |
293 | } |
294 | |
295 | $raw = trim($recordObj->metadata->asXML()); |
296 | |
297 | // Extract the actual metadata from inside the <metadata></metadata> tags; |
298 | // there is probably a cleaner way to do this, but this simple method avoids |
299 | // the complexity of dealing with namespaces in SimpleXML. |
300 | // |
301 | // We should also apply global search and replace at this time, if |
302 | // applicable. |
303 | $record = $this->performGlobalReplace( |
304 | preg_replace('/(^<metadata[^\>]*>)|(<\/metadata>$)/m', '', $raw) |
305 | ); |
306 | |
307 | // Collect attributes (for proper namespace resolution): |
308 | $metadataAttributes = $this->extractHigherLevelAttributes( |
309 | $raw, |
310 | 'metadata', |
311 | $record |
312 | ); |
313 | $recordAttributes = $this->extractHigherLevelAttributes( |
314 | trim($recordObj->asXML()), |
315 | 'record', |
316 | $record |
317 | ); |
318 | $extraAttributes = implode( |
319 | ' ', |
320 | array_unique( |
321 | array_merge($metadataAttributes, $recordAttributes) |
322 | ) |
323 | ); |
324 | |
325 | // If we are supposed to inject any values, do so now inside the first |
326 | // tag of the file: |
327 | $insert = $this->getIdAdditions($id) |
328 | . $this->getHeaderAdditions($recordObj->header); |
329 | $xml = !empty($insert) |
330 | ? preg_replace('/>/', '>' . $insert, $record, 1) : $record; |
331 | |
332 | // Build the final record: |
333 | return trim( |
334 | $this->fixNamespaces( |
335 | $xml, |
336 | $recordObj->getDocNamespaces(), |
337 | $extraAttributes |
338 | ) |
339 | ); |
340 | } |
341 | |
342 | /** |
343 | * Do we need access to set information? |
344 | * |
345 | * @return bool |
346 | */ |
347 | public function needsSetNames() |
348 | { |
349 | return $this->injectSetName; |
350 | } |
351 | |
352 | /** |
353 | * Inject set name information. |
354 | * |
355 | * @param array $names Associative array of setSpec => setName |
356 | * |
357 | * @return void |
358 | */ |
359 | public function setSetNames($names) |
360 | { |
361 | $this->setNames = $names; |
362 | } |
363 | } |