Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
82.05% covered (warning)
82.05%
32 / 39
66.67% covered (warning)
66.67%
4 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
RecordWriter
82.05% covered (warning)
82.05%
32 / 39
66.67% covered (warning)
66.67%
4 / 6
19.87
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 extractID
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
3
 normalizeDate
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 writeHarvestedIdsLog
16.67% covered (danger)
16.67%
1 / 6
0.00% covered (danger)
0.00%
0 / 1
13.26
 getBasePath
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 write
89.47% covered (warning)
89.47%
17 / 19
0.00% covered (danger)
0.00%
0 / 1
6.04
1<?php
2
3/**
4 * OAI-PMH Record Writer
5 *
6 * PHP version 7
7 *
8 * Copyright (c) Demian Katz 2016.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2,
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 *
23 * @category VuFind
24 * @package  Harvest_Tools
25 * @author   Demian Katz <demian.katz@villanova.edu>
26 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
27 * @link     https://vufind.org/wiki/indexing:oai-pmh Wiki
28 */
29
30namespace VuFindHarvest\OaiPmh;
31
32use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyInterface;
33
34use function strlen;
35
36/**
37 * OAI-PMH Record Writer
38 *
39 * @category VuFind
40 * @package  Harvest_Tools
41 * @author   Demian Katz <demian.katz@villanova.edu>
42 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
43 * @link     https://vufind.org/wiki/indexing:oai-pmh Wiki
44 */
45class RecordWriter
46{
47    /**
48     * Filename for logging harvested IDs (false for none)
49     *
50     * @var string|bool
51     */
52    protected $harvestedIdLog = false;
53
54    /**
55     * OAI prefix to strip from ID values
56     *
57     * @var string
58     */
59    protected $idPrefix = '';
60
61    /**
62     * Regular expression searches
63     *
64     * @var array
65     */
66    protected $idSearch = [];
67
68    /**
69     * Replacements for regular expression matches
70     *
71     * @var array
72     */
73    protected $idReplace = [];
74
75    /**
76     * XML record formatter
77     *
78     * @var RecordXmlFormatter
79     */
80    protected $recordFormatter;
81
82    /**
83     * Writer strategy
84     *
85     * @var RecordWriterStrategyInterface
86     */
87    protected $strategy;
88
89    /**
90     * Constructor
91     *
92     * @param RecordWriterStrategyInterface $strategy  Writing strategy
93     * @param RecordXmlFormatter            $formatter XML record formatter
94     * @param array                         $settings  Configuration settings
95     */
96    public function __construct($strategy, $formatter, $settings = [])
97    {
98        $this->recordFormatter = $formatter;
99        $this->strategy = $strategy;
100
101        // Settings that may be mapped directly from $settings to class properties:
102        $mappableSettings = [
103            'harvestedIdLog', 'idPrefix', 'idReplace', 'idSearch',
104        ];
105        foreach ($mappableSettings as $current) {
106            if (isset($settings[$current])) {
107                $this->$current = $settings[$current];
108            }
109        }
110    }
111
112    /**
113     * Extract the ID from a record object (support method for processRecords()).
114     *
115     * @param object $record SimpleXML record.
116     *
117     * @return string        The ID value.
118     */
119    protected function extractID($record)
120    {
121        // Normalize to string:
122        $id = (string)$record->header->identifier;
123
124        // Strip prefix if found:
125        if (substr($id, 0, strlen($this->idPrefix)) == $this->idPrefix) {
126            $id = substr($id, strlen($this->idPrefix));
127        }
128
129        // Apply regular expression matching:
130        if (!empty($this->idSearch)) {
131            $id = preg_replace($this->idSearch, $this->idReplace, $id);
132        }
133
134        // Return final value:
135        return $id;
136    }
137
138    /**
139     * Normalize a date to a Unix timestamp.
140     *
141     * @param string $date Date (ISO-8601 or YYYY-MM-DD HH:MM:SS)
142     *
143     * @return integer     Unix timestamp (or false if $date invalid)
144     */
145    protected function normalizeDate($date)
146    {
147        // Remove timezone markers -- we don't want PHP to outsmart us by adjusting
148        // the time zone!
149        $date = str_replace(['T', 'Z'], [' ', ''], $date);
150
151        // Translate to a timestamp:
152        return strtotime($date);
153    }
154
155    /**
156     * Write a log file of harvested IDs (if configured to do so).
157     *
158     * @param array $harvestedIds Harvested IDs
159     *
160     * @return void
161     * @throws \Exception
162     */
163    protected function writeHarvestedIdsLog($harvestedIds)
164    {
165        // Do we have IDs to log and a log filename?  If so, log them:
166        if (!empty($this->harvestedIdLog) && !empty($harvestedIds)) {
167            $file = fopen($this->getBasePath() . $this->harvestedIdLog, 'a');
168            if (!$file) {
169                throw new \Exception("Problem opening {$this->harvestedIdLog}.");
170            }
171            fwrite($file, implode(PHP_EOL, $harvestedIds) . PHP_EOL);
172            fclose($file);
173        }
174    }
175
176    /**
177     * Get base path for writes.
178     *
179     * @return string
180     */
181    public function getBasePath()
182    {
183        return $this->strategy->getBasePath();
184    }
185
186    /**
187     * Save harvested records to disk and return the end date.
188     *
189     * @param object $records SimpleXML records.
190     *
191     * @return int
192     */
193    public function write($records)
194    {
195        // Array for tracking successfully harvested IDs:
196        $harvestedIds = [];
197
198        // Date of most recent record encountered:
199        $endDate = 0;
200
201        $this->strategy->beginWrite();
202
203        // Loop through the records:
204        foreach ($records as $record) {
205            // Die if the record is missing its header:
206            if (empty($record->header)) {
207                throw new \Exception('Unexpected missing record header.');
208            }
209
210            // Get the ID of the current record:
211            $id = $this->extractID($record);
212
213            // Save the current record, either as a deleted or as a regular file:
214            $attribs = $record->header->attributes();
215            if (strtolower($attribs['status'] ?? '') == 'deleted') {
216                $this->strategy->addDeletedRecord($id);
217            } else {
218                $recordXML = $this->recordFormatter->format($id, $record);
219                $this->strategy->addRecord($id, $recordXML);
220                $harvestedIds[] = $id;
221            }
222
223            // If the current record's date is newer than the previous end date,
224            // remember it for future reference:
225            $date = $this->normalizeDate($record->header->datestamp);
226            if ($date && $date > $endDate) {
227                $endDate = $date;
228            }
229        }
230
231        $this->strategy->endWrite();
232
233        $this->writeHarvestedIdsLog($harvestedIds);
234
235        return $endDate;
236    }
237}