Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
10 / 10 |
|
100.00% |
2 / 2 |
CRAP | |
100.00% |
1 / 1 |
DefaultSpellingNormalizer | |
100.00% |
10 / 10 |
|
100.00% |
2 / 2 |
4 | |
100.00% |
1 / 1 |
__invoke | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
stripDiacritics | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | /** |
4 | * Default text normalizer for spellcheck text replacement. |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2021. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Normalizer |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org Main Page |
28 | */ |
29 | |
30 | namespace VuFind\Normalizer; |
31 | |
32 | use function in_array; |
33 | |
34 | /** |
35 | * Default text normalizer for spellcheck text replacement. |
36 | * |
37 | * @category VuFind |
38 | * @package Normalizer |
39 | * @author Demian Katz <demian.katz@villanova.edu> |
40 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
41 | * @link https://vufind.org Main Page |
42 | */ |
43 | class DefaultSpellingNormalizer |
44 | { |
45 | /** |
46 | * Apply normalization to a string. |
47 | * |
48 | * @param string $text String to normalize. |
49 | * |
50 | * @return string |
51 | */ |
52 | public function __invoke($text) |
53 | { |
54 | // The input to the function may be a Solr query with Boolean operators |
55 | // in it; we want to be careful not to turn this into something invalid. |
56 | $stripped = $this->stripDiacritics($text); |
57 | $booleans = ['AND', 'OR', 'NOT']; |
58 | $words = []; |
59 | foreach (preg_split('/\s+/', $stripped) as $word) { |
60 | $words[] = in_array($word, $booleans) ? $word : mb_strtolower($word, 'UTF-8'); |
61 | } |
62 | return implode(' ', $words); |
63 | } |
64 | |
65 | /** |
66 | * Remove diacritics (accents, umlauts, etc.) from a string |
67 | * |
68 | * @param string $string The text where we would like to remove diacritics |
69 | * |
70 | * @return string The input text with diacritics removed |
71 | */ |
72 | protected function stripDiacritics($string) |
73 | { |
74 | // See http://userguide.icu-project.org/transforms/general for |
75 | // an explanation of this. |
76 | $transliterator = \Transliterator::createFromRules( |
77 | ':: NFD; :: [:Nonspacing Mark:] Remove; :: NFC;', |
78 | \Transliterator::FORWARD |
79 | ); |
80 | return $transliterator->transliterate($string); |
81 | } |
82 | } |