Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
59 / 59
100.00% covered (success)
100.00%
3 / 3
CRAP
100.00% covered (success)
100.00%
1 / 1
VuFindWorkKeys
100.00% covered (success)
100.00%
59 / 59
100.00% covered (success)
100.00%
3 / 3
19
100.00% covered (success)
100.00%
1 / 1
 getWorkKeys
100.00% covered (success)
100.00%
49 / 49
100.00% covered (success)
100.00%
1 / 1
14
 deDom
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 normalize
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3/**
4 * XSLT importer support methods for work key generation.
5 *
6 * PHP version 8
7 *
8 * Copyright (c) Demian Katz 2020.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2,
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 *
23 * @category VuFind
24 * @package  Import_Tools
25 * @author   Demian Katz <demian.katz@villanova.edu>
26 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
27 * @link     https://vufind.org/wiki/indexing Wiki
28 */
29
30namespace VuFind\XSLT\Import;
31
32use DOMDocument;
33use Normalizer;
34
35use function in_array;
36
37/**
38 * XSLT importer support methods for work key generation.
39 *
40 * @category VuFind
41 * @package  Import_Tools
42 * @author   Demian Katz <demian.katz@villanova.edu>
43 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
44 * @link     https://vufind.org/wiki/indexing Wiki
45 */
46class VuFindWorkKeys
47{
48    /**
49     * Get all work identification keys for the record.
50     *
51     * @param Iterable $uniformTitles       Uniform title(s) for the work
52     * @param Iterable $titles              Other title(s) for the work
53     * @param Iterable $trimmedTitles       Title(s) with leading articles, etc.,
54     * removed
55     * @param Iterable $authors             Author(s) for the work
56     * @param string   $includeRegEx        Regular expression defining characters to
57     * keep
58     * @param string   $excludeRegEx        Regular expression defining characters to
59     * remove
60     * @param string   $transliteratorRules Optional ICU transliteration rules to be
61     * applied before the include and exclude regex's. See
62     * https://unicode-org.github.io/icu/userguide/transforms/general/
63     * #icu-transliterators for more information on the transliteration rules.
64     *
65     * @return DOMDocument
66     */
67    public static function getWorkKeys(
68        $uniformTitles,
69        $titles,
70        $trimmedTitles,
71        $authors,
72        $includeRegEx = '',
73        $excludeRegEx = '',
74        $transliteratorRules = ''
75    ) {
76        $transliterator = $transliteratorRules
77            ? \Transliterator::createFromRules(
78                $transliteratorRules,
79                \Transliterator::FORWARD
80            ) : null;
81
82        $dom = new DOMDocument('1.0', 'utf-8');
83
84        $uniformTitles = is_iterable($uniformTitles)
85            ? $uniformTitles : (array)$uniformTitles;
86        foreach ($uniformTitles as $uniformTitle) {
87            $normalizedTitle = self::normalize(
88                $uniformTitle,
89                $includeRegEx,
90                $excludeRegEx,
91                $transliterator
92            );
93            if (!empty($normalizedTitle)) {
94                $element = $dom->createElement('workKey', 'UT ' . $normalizedTitle);
95                $dom->appendChild($element);
96            }
97        }
98
99        // Exit early if there are no authors, since we can't make author/title keys:
100        $authors = is_iterable($authors) ? $authors : (array)$authors;
101        if (empty($authors)) {
102            return $dom;
103        }
104        $titles = $titles instanceof \Traversable
105            ? iterator_to_array($titles) : (array)$titles;
106        $trimmedTitles = $trimmedTitles instanceof \Traversable
107            ? iterator_to_array($trimmedTitles) : (array)$trimmedTitles;
108        $normalizedTitles = [];
109        foreach (array_merge($titles, $trimmedTitles) as $title) {
110            $normalizedTitle = self::normalize(
111                $title,
112                $includeRegEx,
113                $excludeRegEx,
114                $transliterator
115            );
116            if (
117                empty($normalizedTitle)                          // skip empties
118                || in_array($normalizedTitle, $normalizedTitles) // avoid dupes
119            ) {
120                continue;
121            }
122            $normalizedTitles[] = $normalizedTitle;
123            foreach ($authors as $author) {
124                $normalizedAuthor = self::normalize(
125                    $author,
126                    $includeRegEx,
127                    $excludeRegEx,
128                    $transliterator
129                );
130                if (!empty($author)) {
131                    $key = 'AT ' . $normalizedAuthor . ' ' . $normalizedTitle;
132                    $element = $dom->createElement('workKey', $key);
133                    $dom->appendChild($element);
134                }
135            }
136        }
137
138        return $dom;
139    }
140
141    /**
142     * Force a value to a string, even if it's a DOMElement.
143     *
144     * @param string|DOMElement $string String to normalize
145     *
146     * @return string
147     */
148    protected static function deDom($string): string
149    {
150        return $string->textContent ?? (string)$string;
151    }
152
153    /**
154     * Create a key string.
155     *
156     * @param string|DOMElement $rawString      String to normalize
157     * @param string            $includeRegEx   Regular expression defining
158     * characters to keep
159     * @param string            $excludeRegEx   Regular expression defining
160     * characters to remove
161     * @param \Transliterator   $transliterator Transliterator
162     *
163     * @return string
164     */
165    protected static function normalize(
166        $rawString,
167        $includeRegEx,
168        $excludeRegEx,
169        $transliterator
170    ) {
171        // Handle strings and/or DOM elements:
172        $string = self::deDom($rawString);
173        $normalized = $transliterator ? $transliterator->transliterate($string)
174            : Normalizer::normalize($string, Normalizer::FORM_KC);
175        if (!empty($includeRegEx)) {
176            preg_match_all($includeRegEx, $normalized, $matches);
177            $normalized = implode($matches[0] ?? []);
178        }
179        if (!empty($excludeRegEx)) {
180            $normalized = preg_replace($excludeRegEx, '', $normalized);
181        }
182        return mb_substr(mb_strtolower($normalized, 'UTF-8'), 0, 255, 'UTF-8');
183    }
184}