Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 42 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
RecordWriter | |
0.00% |
0 / 42 |
|
0.00% |
0 / 6 |
342 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
extractID | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
normalizeDate | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
writeHarvestedIdsLog | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
getBasePath | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
write | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | |
3 | /** |
4 | * OAI-PMH Record Writer |
5 | * |
6 | * PHP version 7 |
7 | * |
8 | * Copyright (c) Demian Katz 2016. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Harvest_Tools |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org/wiki/indexing:oai-pmh Wiki |
28 | */ |
29 | |
30 | namespace VuFindHarvest\OaiPmh; |
31 | |
32 | use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyInterface; |
33 | |
34 | use function strlen; |
35 | |
36 | /** |
37 | * OAI-PMH Record Writer |
38 | * |
39 | * @category VuFind |
40 | * @package Harvest_Tools |
41 | * @author Demian Katz <demian.katz@villanova.edu> |
42 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
43 | * @link https://vufind.org/wiki/indexing:oai-pmh Wiki |
44 | */ |
45 | class RecordWriter |
46 | { |
47 | /** |
48 | * Filename for logging harvested IDs (false for none) |
49 | * |
50 | * @var string|bool |
51 | */ |
52 | protected $harvestedIdLog = false; |
53 | |
54 | /** |
55 | * OAI prefix to strip from ID values |
56 | * |
57 | * @var string |
58 | */ |
59 | protected $idPrefix = ''; |
60 | |
61 | /** |
62 | * Regular expression searches |
63 | * |
64 | * @var array |
65 | */ |
66 | protected $idSearch = []; |
67 | |
68 | /** |
69 | * Replacements for regular expression matches |
70 | * |
71 | * @var array |
72 | */ |
73 | protected $idReplace = []; |
74 | |
75 | /** |
76 | * XML record formatter |
77 | * |
78 | * @var RecordXmlFormatter |
79 | */ |
80 | protected $recordFormatter; |
81 | |
82 | /** |
83 | * Writer strategy |
84 | * |
85 | * @var RecordWriterStrategyInterface |
86 | */ |
87 | protected $strategy; |
88 | |
89 | /** |
90 | * Constructor |
91 | * |
92 | * @param RecordWriterStrategyInterface $strategy Writing strategy |
93 | * @param RecordXmlFormatter $formatter XML record formatter |
94 | * @param array $settings Configuration settings |
95 | */ |
96 | public function __construct($strategy, $formatter, $settings = []) |
97 | { |
98 | $this->recordFormatter = $formatter; |
99 | $this->strategy = $strategy; |
100 | |
101 | // Settings that may be mapped directly from $settings to class properties: |
102 | $mappableSettings = [ |
103 | 'harvestedIdLog', 'idPrefix', 'idReplace', 'idSearch', |
104 | ]; |
105 | foreach ($mappableSettings as $current) { |
106 | if (isset($settings[$current])) { |
107 | $this->$current = $settings[$current]; |
108 | } |
109 | } |
110 | } |
111 | |
112 | /** |
113 | * Extract the ID from a record object (support method for processRecords()). |
114 | * |
115 | * @param object $record SimpleXML record. |
116 | * |
117 | * @return string The ID value. |
118 | */ |
119 | protected function extractID($record) |
120 | { |
121 | // Normalize to string: |
122 | $id = (string)$record->header->identifier; |
123 | |
124 | // Strip prefix if found: |
125 | if (substr($id, 0, strlen($this->idPrefix)) == $this->idPrefix) { |
126 | $id = substr($id, strlen($this->idPrefix)); |
127 | } |
128 | |
129 | // Apply regular expression matching: |
130 | if (!empty($this->idSearch)) { |
131 | $id = preg_replace($this->idSearch, $this->idReplace, $id); |
132 | } |
133 | |
134 | // Return final value: |
135 | return $id; |
136 | } |
137 | |
138 | /** |
139 | * Normalize a date to a Unix timestamp. |
140 | * |
141 | * @param string $date Date (ISO-8601 or YYYY-MM-DD HH:MM:SS) |
142 | * |
143 | * @return integer Unix timestamp (or false if $date invalid) |
144 | */ |
145 | protected function normalizeDate($date) |
146 | { |
147 | // Remove timezone markers -- we don't want PHP to outsmart us by adjusting |
148 | // the time zone! |
149 | $date = str_replace(['T', 'Z'], [' ', ''], $date); |
150 | |
151 | // Translate to a timestamp: |
152 | return strtotime($date); |
153 | } |
154 | |
155 | /** |
156 | * Write a log file of harvested IDs (if configured to do so). |
157 | * |
158 | * @param array $harvestedIds Harvested IDs |
159 | * |
160 | * @return void |
161 | * @throws \Exception |
162 | */ |
163 | protected function writeHarvestedIdsLog($harvestedIds) |
164 | { |
165 | // Do we have IDs to log and a log filename? If so, log them: |
166 | if (!empty($this->harvestedIdLog) && !empty($harvestedIds)) { |
167 | $file = fopen($this->getBasePath() . $this->harvestedIdLog, 'a'); |
168 | if (!$file) { |
169 | throw new \Exception("Problem opening {$this->harvestedIdLog}."); |
170 | } |
171 | fwrite($file, implode(PHP_EOL, $harvestedIds) . PHP_EOL); |
172 | fclose($file); |
173 | } |
174 | } |
175 | |
176 | /** |
177 | * Get base path for writes. |
178 | * |
179 | * @return string |
180 | */ |
181 | public function getBasePath() |
182 | { |
183 | return $this->strategy->getBasePath(); |
184 | } |
185 | |
186 | /** |
187 | * Save harvested records to disk and return the end date. |
188 | * |
189 | * @param object $records SimpleXML records. |
190 | * |
191 | * @return int |
192 | */ |
193 | public function write($records) |
194 | { |
195 | // Array for tracking successfully harvested IDs: |
196 | $harvestedIds = []; |
197 | |
198 | // Date of most recent record encountered: |
199 | $endDate = 0; |
200 | |
201 | $this->strategy->beginWrite(); |
202 | |
203 | // Loop through the records: |
204 | foreach ($records as $record) { |
205 | // Die if the record is missing its header: |
206 | if (empty($record->header)) { |
207 | throw new \Exception('Unexpected missing record header.'); |
208 | } |
209 | |
210 | // Get the ID of the current record: |
211 | $id = $this->extractID($record); |
212 | |
213 | // Save the current record, either as a deleted or as a regular file: |
214 | $attribs = $record->header->attributes(); |
215 | if (strtolower($attribs['status'] ?? '') == 'deleted') { |
216 | $this->strategy->addDeletedRecord($id); |
217 | } else { |
218 | $recordXML = $this->recordFormatter->format($id, $record); |
219 | $this->strategy->addRecord($id, $recordXML); |
220 | $harvestedIds[] = $id; |
221 | } |
222 | |
223 | // If the current record's date is newer than the previous end date, |
224 | // remember it for future reference: |
225 | $date = $this->normalizeDate($record->header->datestamp); |
226 | if ($date && $date > $endDate) { |
227 | $endDate = $date; |
228 | } |
229 | } |
230 | |
231 | $this->strategy->endWrite(); |
232 | |
233 | $this->writeHarvestedIdsLog($harvestedIds); |
234 | |
235 | return $endDate; |
236 | } |
237 | } |