Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 110 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
Iso2709 | |
0.00% |
0 / 110 |
|
0.00% |
0 / 9 |
812 | |
0.00% |
0 / 1 |
canParse | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
canParseCollection | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
canParseCollectionFile | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
collectionFromString | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
fromString | |
0.00% |
0 / 39 |
|
0.00% |
0 / 1 |
72 | |||
toString | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
56 | |||
openCollectionFile | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
rewind | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getNextRecord | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | /** |
4 | * ISO2709 MARC exchange format support class. |
5 | * |
6 | * PHP version 7 |
7 | * |
8 | * Copyright (C) The National Library of Finland 2020-2022. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package MARC |
25 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org/wiki/development:plugins:record_drivers Wiki |
28 | */ |
29 | |
30 | namespace VuFind\Marc\Serialization; |
31 | |
32 | use function array_slice; |
33 | use function is_array; |
34 | use function strlen; |
35 | |
36 | /** |
37 | * ISO2709 exchange format support class. |
38 | * |
39 | * @category VuFind |
40 | * @package MARC |
41 | * @author Ere Maijala <ere.maijala@helsinki.fi> |
42 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
43 | * @link https://vufind.org/wiki/development:plugins:record_drivers Wiki |
44 | */ |
45 | class Iso2709 extends AbstractSerializationFile implements SerializationInterface |
46 | { |
47 | public const SUBFIELD_INDICATOR = "\x1F"; |
48 | public const END_OF_FIELD = "\x1E"; |
49 | public const END_OF_RECORD = "\x1D"; |
50 | public const LEADER_LEN = 24; |
51 | public const MAX_LENGTH = 99999; |
52 | |
53 | /** |
54 | * Serialized record file handle |
55 | * |
56 | * @var resource |
57 | */ |
58 | protected $file = null; |
59 | |
60 | /** |
61 | * Check if this class can parse the given MARC string |
62 | * |
63 | * @param string $marc MARC |
64 | * |
65 | * @return bool |
66 | */ |
67 | public static function canParse(string $marc): bool |
68 | { |
69 | // A pretty naïve check, but it's enough to tell the different formats apart |
70 | return ctype_digit(substr($marc, 0, 4)); |
71 | } |
72 | |
73 | /** |
74 | * Check if the serialization class can parse the given MARC collection string |
75 | * |
76 | * @param string $marc MARC |
77 | * |
78 | * @return bool |
79 | */ |
80 | public static function canParseCollection(string $marc): bool |
81 | { |
82 | // A pretty naïve check, but it's enough to tell the different formats apart |
83 | return ctype_digit(substr($marc, 0, 5)); |
84 | } |
85 | |
86 | /** |
87 | * Check if the serialization class can parse the given MARC collection file |
88 | * |
89 | * @param string $file File name |
90 | * |
91 | * @return bool |
92 | */ |
93 | public static function canParseCollectionFile(string $file): bool |
94 | { |
95 | if (false === ($f = fopen($file, 'rb'))) { |
96 | throw new \Exception("Cannot open file '$file' for reading"); |
97 | } |
98 | $s = ''; |
99 | do { |
100 | $s .= fgets($f, 10); |
101 | } while (strlen(ltrim($s)) < 5 && !feof($f)); |
102 | fclose($f); |
103 | |
104 | return self::canParseCollection($s); |
105 | } |
106 | |
107 | /** |
108 | * Parse MARC collection from a string into an array |
109 | * |
110 | * @param string $collection MARC record collection in the format supported by |
111 | * the serialization class |
112 | * |
113 | * @throws \Exception |
114 | * @return array |
115 | */ |
116 | public static function collectionFromString(string $collection): array |
117 | { |
118 | return array_slice( |
119 | array_map( |
120 | function ($record) { |
121 | // Clean up any extra characters between records and append an |
122 | // end-of-record marker lost in explode: |
123 | return ltrim($record, "\x00\x0a\x0d") . self::END_OF_RECORD; |
124 | }, |
125 | explode(self::END_OF_RECORD, $collection) |
126 | ), |
127 | 0, |
128 | -1 |
129 | ); |
130 | } |
131 | |
132 | /** |
133 | * Parse an ISO2709 string |
134 | * |
135 | * @param string $marc ISO2709 |
136 | * |
137 | * @throws \Exception |
138 | * @return array |
139 | */ |
140 | public static function fromString(string $marc): array |
141 | { |
142 | $leader = substr($marc, 0, 24); |
143 | $fields = []; |
144 | $dataStart = 0 + (int)substr($marc, 12, 5); |
145 | $dirLen = $dataStart - self::LEADER_LEN - 1; |
146 | $invalid = false; |
147 | |
148 | $offset = 0; |
149 | while ($offset < $dirLen) { |
150 | // Use substr for byte-based positions: |
151 | $tag = substr($marc, self::LEADER_LEN + $offset, 3); |
152 | $len = (int)substr($marc, self::LEADER_LEN + $offset + 3, 4); |
153 | $dataOffset |
154 | = (int)substr($marc, self::LEADER_LEN + $offset + 7, 5); |
155 | |
156 | $tagData = substr($marc, $dataStart + $dataOffset, $len); |
157 | |
158 | if (substr($tagData, -1, 1) == self::END_OF_FIELD) { |
159 | $tagData = substr($tagData, 0, -1); |
160 | } else { |
161 | $invalid = true; |
162 | } |
163 | |
164 | if (ctype_digit($tag) && $tag < 10) { |
165 | $fields[] = [$tag => $tagData]; |
166 | } else { |
167 | // Use mb_substr to extract indicators to ensure proper results with |
168 | // multibyte characters, and make sure we have at least a space for |
169 | // an indicator: |
170 | $newField = [ |
171 | 'ind1' => mb_substr($tagData . ' ', 0, 1, 'UTF-8'), |
172 | 'ind2' => mb_substr($tagData . ' ', 1, 1, 'UTF-8'), |
173 | 'subfields' => [], |
174 | ]; |
175 | $subfields = explode( |
176 | self::SUBFIELD_INDICATOR, |
177 | mb_substr($tagData, 3, null, 'UTF-8') |
178 | ); |
179 | foreach ($subfields as $subfield) { |
180 | if ('' === $subfield) { |
181 | continue; |
182 | } |
183 | // Use mb_substr to extract the first character and the rest to |
184 | // ensure proper results with multibyte characters: |
185 | $newField['subfields'][] = [ |
186 | mb_substr($subfield, 0, 1, 'UTF-8') |
187 | => mb_substr($subfield, 1, null, 'UTF-8'), |
188 | ]; |
189 | } |
190 | $fields[] = [$tag => $newField]; |
191 | } |
192 | |
193 | $offset += 12; |
194 | } |
195 | |
196 | $result = compact('leader', 'fields'); |
197 | if ($invalid) { |
198 | $result['warnings'] = ['Invalid MARC record (end of field not found)']; |
199 | } |
200 | return $result; |
201 | } |
202 | |
203 | /** |
204 | * Convert record to an ISO2709 string |
205 | * |
206 | * @param array $record Record data |
207 | * |
208 | * @return string |
209 | */ |
210 | public static function toString(array $record): string |
211 | { |
212 | $directory = ''; |
213 | $data = ''; |
214 | $datapos = 0; |
215 | foreach ($record['fields'] as $fieldData) { |
216 | $tag = (string)key($fieldData); |
217 | $field = current($fieldData); |
218 | if (is_array($field)) { |
219 | $fieldStr = mb_substr($field['ind1'] . ' ', 0, 1, 'UTF-8') |
220 | . mb_substr($field['ind2'] . ' ', 0, 1, 'UTF-8'); |
221 | foreach ((array)($field['subfields'] ?? []) as $subfield) { |
222 | $subfieldCode = (string)key($subfield); |
223 | $fieldStr .= self::SUBFIELD_INDICATOR |
224 | . $subfieldCode . current($subfield); |
225 | } |
226 | } else { |
227 | $fieldStr = $field; |
228 | } |
229 | $fieldStr .= self::END_OF_FIELD; |
230 | $len = strlen($fieldStr); |
231 | if ($len > 9999) { |
232 | return ''; |
233 | } |
234 | if ($datapos > 99999) { |
235 | return ''; |
236 | } |
237 | $directory .= $tag . str_pad($len, 4, '0', STR_PAD_LEFT) |
238 | . str_pad($datapos, 5, '0', STR_PAD_LEFT); |
239 | $datapos += $len; |
240 | $data .= $fieldStr; |
241 | } |
242 | $directory .= self::END_OF_FIELD; |
243 | $data .= self::END_OF_RECORD; |
244 | $leader = str_pad(substr($record['leader'], 0, 24), 24); |
245 | $dataStart = 24 + strlen($directory); |
246 | $recordLen = $dataStart + strlen($data); |
247 | if ($recordLen > 99999) { |
248 | return ''; |
249 | } |
250 | |
251 | $leader = str_pad($recordLen, 5, '0', STR_PAD_LEFT) |
252 | . substr($leader, 5, 7) |
253 | . str_pad($dataStart, 5, '0', STR_PAD_LEFT) |
254 | . substr($leader, 17); |
255 | |
256 | return $leader . $directory . $data; |
257 | } |
258 | |
259 | /** |
260 | * Open a collection file |
261 | * |
262 | * @param string $file File name |
263 | * |
264 | * @return void |
265 | * |
266 | * @throws \Exception |
267 | */ |
268 | public function openCollectionFile(string $file): void |
269 | { |
270 | if (false === ($this->file = fopen($file, 'rb'))) { |
271 | throw new \Exception("Cannot open file '$file' for reading"); |
272 | } |
273 | } |
274 | |
275 | /** |
276 | * Rewind the collection file |
277 | * |
278 | * @return void |
279 | * |
280 | * @throws \Exception |
281 | */ |
282 | public function rewind(): void |
283 | { |
284 | if (null === $this->file) { |
285 | throw new \Exception('Collection file not open'); |
286 | } |
287 | rewind($this->file); |
288 | } |
289 | |
290 | /** |
291 | * Get next record from the file or an empty string on EOF |
292 | * |
293 | * @return string |
294 | * |
295 | * @throws \Exception |
296 | */ |
297 | public function getNextRecord(): string |
298 | { |
299 | if (null === $this->file) { |
300 | throw new \Exception('Collection file not open'); |
301 | } |
302 | $record = ltrim( |
303 | stream_get_line( |
304 | $this->file, |
305 | self::MAX_LENGTH, |
306 | self::END_OF_RECORD |
307 | ), |
308 | "\x00\x0a\x0d" |
309 | ); |
310 | |
311 | return $record ? ($record . self::END_OF_RECORD) : ''; |
312 | } |
313 | } |