Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
66.67% covered (warning)
66.67%
4 / 6
CRAP
83.33% covered (warning)
83.33%
35 / 42
RecordWriter
0.00% covered (danger)
0.00%
0 / 1
66.67% covered (warning)
66.67%
4 / 6
19.50
83.33% covered (warning)
83.33%
35 / 42
 __construct
100.00% covered (success)
100.00%
1 / 1
3
100.00% covered (success)
100.00%
7 / 7
 extractID
100.00% covered (success)
100.00%
1 / 1
3
100.00% covered (success)
100.00%
6 / 6
 normalizeDate
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 writeHarvestedIdsLog
0.00% covered (danger)
0.00%
0 / 1
9.83
28.57% covered (danger)
28.57%
2 / 7
 getBasePath
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
1 / 1
 write
0.00% covered (danger)
0.00%
0 / 1
6.04
89.47% covered (warning)
89.47%
17 / 19
<?php
/**
 * OAI-PMH Record Writer
 *
 * PHP version 7
 *
 * Copyright (c) Demian Katz 2016.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * @category VuFind
 * @package  Harvest_Tools
 * @author   Demian Katz <demian.katz@villanova.edu>
 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
 * @link     https://vufind.org/wiki/indexing:oai-pmh Wiki
 */
namespace VuFindHarvest\OaiPmh;
use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyInterface;
/**
 * OAI-PMH Record Writer
 *
 * @category VuFind
 * @package  Harvest_Tools
 * @author   Demian Katz <demian.katz@villanova.edu>
 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
 * @link     https://vufind.org/wiki/indexing:oai-pmh Wiki
 */
class RecordWriter
{
    /**
     * Filename for logging harvested IDs (false for none)
     *
     * @var string|bool
     */
    protected $harvestedIdLog = false;
    /**
     * OAI prefix to strip from ID values
     *
     * @var string
     */
    protected $idPrefix = '';
    /**
     * Regular expression searches
     *
     * @var array
     */
    protected $idSearch = [];
    /**
     * Replacements for regular expression matches
     *
     * @var array
     */
    protected $idReplace = [];
    /**
     * XML record formatter
     *
     * @var RecordXmlFormatter
     */
    protected $recordFormatter;
    /**
     * Writer strategy
     *
     * @var RecordWriterStrategyInterface
     */
    protected $strategy;
    /**
     * Constructor
     *
     * @param RecordWriterStrategyInterface $strategy  Writing strategy
     * @param RecordXmlFormatter            $formatter XML record formatter
     * @param array                         $settings  Configuration settings
     */
    public function __construct($strategy, $formatter, $settings = [])
    {
        $this->recordFormatter = $formatter;
        $this->strategy = $strategy;
        // Settings that may be mapped directly from $settings to class properties:
        $mappableSettings = [
            'harvestedIdLog', 'idPrefix', 'idReplace', 'idSearch',
        ];
        foreach ($mappableSettings as $current) {
            if (isset($settings[$current])) {
                $this->$current = $settings[$current];
            }
        }
    }
    /**
     * Extract the ID from a record object (support method for processRecords()).
     *
     * @param object $record SimpleXML record.
     *
     * @return string        The ID value.
     */
    protected function extractID($record)
    {
        // Normalize to string:
        $id = (string)$record->header->identifier;
        // Strip prefix if found:
        if (substr($id, 0, strlen($this->idPrefix)) == $this->idPrefix) {
            $id = substr($id, strlen($this->idPrefix));
        }
        // Apply regular expression matching:
        if (!empty($this->idSearch)) {
            $id = preg_replace($this->idSearch, $this->idReplace, $id);
        }
        // Return final value:
        return $id;
    }
    /**
     * Normalize a date to a Unix timestamp.
     *
     * @param string $date Date (ISO-8601 or YYYY-MM-DD HH:MM:SS)
     *
     * @return integer     Unix timestamp (or false if $date invalid)
     */
    protected function normalizeDate($date)
    {
        // Remove timezone markers -- we don't want PHP to outsmart us by adjusting
        // the time zone!
        $date = str_replace(['T', 'Z'], [' ', ''], $date);
        // Translate to a timestamp:
        return strtotime($date);
    }
    /**
     * Write a log file of harvested IDs (if configured to do so).
     *
     * @param array $harvestedIds Harvested IDs
     *
     * @return void
     * @throws \Exception
     */
    protected function writeHarvestedIdsLog($harvestedIds)
    {
        // Do we have IDs to log and a log filename?  If so, log them:
        if (!empty($this->harvestedIdLog) && !empty($harvestedIds)) {
            $file = fopen($this->getBasePath() . $this->harvestedIdLog, 'a');
            if (!$file) {
                throw new \Exception("Problem opening {$this->harvestedIdLog}.");
            }
            fputs($file, implode(PHP_EOL, $harvestedIds) . PHP_EOL);
            fclose($file);
        }
    }
    /**
     * Get base path for writes.
     *
     * @return string
     */
    public function getBasePath()
    {
        return $this->strategy->getBasePath();
    }
    /**
     * Save harvested records to disk and return the end date.
     *
     * @param object $records SimpleXML records.
     *
     * @return int
     */
    public function write($records)
    {
        // Array for tracking successfully harvested IDs:
        $harvestedIds = [];
        // Date of most recent record encountered:
        $endDate = 0;
        $this->strategy->beginWrite();
        // Loop through the records:
        foreach ($records as $record) {
            // Die if the record is missing its header:
            if (empty($record->header)) {
                throw new \Exception('Unexpected missing record header.');
            }
            // Get the ID of the current record:
            $id = $this->extractID($record);
            // Save the current record, either as a deleted or as a regular file:
            $attribs = $record->header->attributes();
            if (strtolower($attribs['status']) == 'deleted') {
                $this->strategy->addDeletedRecord($id);
            } else {
                $recordXML = $this->recordFormatter->format($id, $record);
                $this->strategy->addRecord($id, $recordXML);
                $harvestedIds[] = $id;
            }
            // If the current record's date is newer than the previous end date,
            // remember it for future reference:
            $date = $this->normalizeDate($record->header->datestamp);
            if ($date && $date > $endDate) {
                $endDate = $date;
            }
        }
        $this->strategy->endWrite();
        $this->writeHarvestedIdsLog($harvestedIds);
        return $endDate;
    }
}