Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
63 / 63 |
|
100.00% |
2 / 2 |
CRAP | |
100.00% |
1 / 1 |
Utils | |
100.00% |
63 / 63 |
|
100.00% |
2 / 2 |
28 | |
100.00% |
1 / 1 |
parseRange | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
sanitizeDate | |
100.00% |
59 / 59 |
|
100.00% |
1 / 1 |
26 |
1 | <?php |
2 | |
3 | /** |
4 | * Solr Utility Functions |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Andrew Nagy 2009. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Solr |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org/wiki/development Wiki |
28 | */ |
29 | |
30 | namespace VuFind\Solr; |
31 | |
32 | use function extension_loaded; |
33 | use function strlen; |
34 | |
35 | /** |
36 | * Solr Utility Functions |
37 | * |
38 | * This class is designed to hold Solr-related support methods that may |
39 | * be called statically. This allows sharing of some Solr-related logic |
40 | * between the Solr and Summon classes. |
41 | * |
42 | * @category VuFind |
43 | * @package Solr |
44 | * @author Demian Katz <demian.katz@villanova.edu> |
45 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
46 | * @link https://vufind.org/wiki/development Wiki |
47 | */ |
48 | class Utils |
49 | { |
50 | /** |
51 | * Parse "from" and "to" values out of a range query (or return false if the |
52 | * query is not a range). |
53 | * |
54 | * @param string $query Solr query to parse. |
55 | * |
56 | * @return array|bool Array with 'from' and 'to' values extracted from range |
57 | * or false if the provided query is not a range. |
58 | */ |
59 | public static function parseRange($query) |
60 | { |
61 | $regEx = '/\[([^\]]+)\s+TO\s+([^\]]+)\]/'; |
62 | if (!preg_match($regEx, $query, $matches)) { |
63 | return false; |
64 | } |
65 | return ['from' => trim($matches[1]), 'to' => trim($matches[2])]; |
66 | } |
67 | |
68 | /** |
69 | * Convert a raw string date (as, for example, from a MARC record) into a legal |
70 | * Solr date string. Return null if conversion is impossible. |
71 | * |
72 | * @param string $date Date to convert. |
73 | * @param bool $rangeEnd Is this the end of a range? |
74 | * |
75 | * @return string|null |
76 | */ |
77 | public static function sanitizeDate($date, $rangeEnd = false) |
78 | { |
79 | // Strip brackets; we'll assume guesses are correct. |
80 | $date = str_replace(['[', ']'], '', $date); |
81 | |
82 | // Special case -- first four characters are not a year: |
83 | if (!preg_match('/^[0-9]{4}/', $date)) { |
84 | // 'n.d.' means no date known -- give up! |
85 | if (preg_match('/^n\.?\s*d\.?$/', $date)) { |
86 | return null; |
87 | } |
88 | |
89 | // Check for month/year or month-year formats: |
90 | if ( |
91 | preg_match('/([0-9])(-|\/)([0-9]{4})/', $date, $matches) |
92 | || preg_match('/([0-9]{2})(-|\/)([0-9]{4})/', $date, $matches) |
93 | ) { |
94 | $month = $matches[1]; |
95 | $year = $matches[3]; |
96 | $date = "$year-$month"; |
97 | } else { |
98 | // strtotime can only handle a limited range of dates; let's extract |
99 | // a year from the string and temporarily replace it with a known |
100 | // good year; we'll swap it back after the conversion. |
101 | $goodYear = '1999'; |
102 | |
103 | $year = preg_match('/[0-9]{4}/', $date, $matches) |
104 | ? $matches[0] : false; |
105 | if (false !== $year) { |
106 | // Check for a leap year: |
107 | if (extension_loaded('intl')) { |
108 | $calendar = new \IntlGregorianCalendar(); |
109 | if ($calendar->isLeapYear($year)) { |
110 | $goodYear = '1996'; |
111 | } |
112 | } |
113 | $date = str_replace($year, $goodYear, $date); |
114 | } |
115 | $time = @strtotime($date); |
116 | if ($time) { |
117 | $date = @date('Y-m-d', $time); |
118 | if ($year) { |
119 | $date = str_replace($goodYear, $year, $date); |
120 | } |
121 | } elseif ($year) { |
122 | // If the best we can do is extract a 4-digit year, that's better |
123 | // than nothing.... |
124 | $date = $year; |
125 | } else { |
126 | return null; |
127 | } |
128 | } |
129 | } |
130 | |
131 | // If we've gotten this far, we at least know that we have a valid year. |
132 | $year = substr($date, 0, 4); |
133 | |
134 | // Let's get rid of punctuation and normalize separators: |
135 | $date = str_replace(['.', ' ', '?'], '', $date); |
136 | $date = str_replace(['/', '--', '-0'], '-', $date); |
137 | |
138 | // If multiple dates are &'ed together, take just the first: |
139 | [$date] = explode('&', $date); |
140 | |
141 | // Default to start or end of range if no full date present: |
142 | if (strlen($date) < 5) { |
143 | $month = $day = null; |
144 | } else { |
145 | // If we have year + month, parse that out: |
146 | if (strlen($date) < 8) { |
147 | $day = null; |
148 | if (preg_match('/^[0-9]{4}-([0-9]{1,2})/', $date, $matches)) { |
149 | $month = str_pad($matches[1], 2, '0', STR_PAD_LEFT); |
150 | } else { |
151 | $month = null; |
152 | } |
153 | } else { |
154 | // If we have year + month + day, parse that out: |
155 | $ymdRegex = '/^[0-9]{4}-([0-9]{1,2})-([0-9]{1,2})/'; |
156 | if (preg_match($ymdRegex, $date, $matches)) { |
157 | $month = str_pad($matches[1], 2, '0', STR_PAD_LEFT); |
158 | $day = str_pad($matches[2], 2, '0', STR_PAD_LEFT); |
159 | } else { |
160 | $month = $day = null; |
161 | } |
162 | } |
163 | } |
164 | // Determine correct values for month and/or day if we don't have valid ones: |
165 | if (null === $month && null === $day) { |
166 | if ($rangeEnd) { |
167 | $month = '12'; |
168 | $day = '31'; |
169 | } else { |
170 | $month = '01'; |
171 | $day = '01'; |
172 | } |
173 | } elseif (null === $day) { |
174 | if ($rangeEnd) { |
175 | foreach (['31', '30', '29', '28'] as $dayCandidate) { |
176 | if (checkdate($month, $dayCandidate, $year)) { |
177 | $day = $dayCandidate; |
178 | break; |
179 | } |
180 | } |
181 | } else { |
182 | $day = '01'; |
183 | } |
184 | } |
185 | |
186 | // Make sure month/day/year combination is legal. Make it legal if it isn't. |
187 | if (!checkdate($month, $day, $year)) { |
188 | $day = '01'; |
189 | if (!checkdate($month, $day, $year)) { |
190 | $month = '01'; |
191 | } |
192 | } |
193 | |
194 | return "{$year}-{$month}-{$day}T" . ($rangeEnd ? '23:59:59Z' : '00:00:00Z'); |
195 | } |
196 | } |