Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
10.37% |
14 / 135 |
|
5.88% |
1 / 17 |
CRAP | |
0.00% |
0 / 1 |
Generator | |
10.37% |
14 / 135 |
|
5.88% |
1 / 17 |
1998.98 | |
0.00% |
0 / 1 |
__construct | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 | |||
setVerbose | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
verboseMsg | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
setBaseUrl | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
setBaseSitemapUrl | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
setFileLocation | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getTime | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
generate | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
generateWithPlugins | |
0.00% |
0 / 42 |
|
0.00% |
0 / 1 |
240 | |||
getWarnings | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
buildIndex | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
42 | |||
getNewSitemapIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getNewSitemap | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFilenameForPage | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
12 | |||
getBaseSitemapIndexUrl | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPlugin | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
getSitemapLanguages | |
8.33% |
2 / 24 |
|
0.00% |
0 / 1 |
57.30 |
1 | <?php |
2 | |
3 | /** |
4 | * VuFind Sitemap |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Villanova University 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Sitemap |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org Main Page |
28 | */ |
29 | |
30 | namespace VuFind\Sitemap; |
31 | |
32 | use Laminas\Config\Config; |
33 | |
34 | use function call_user_func; |
35 | use function in_array; |
36 | use function is_callable; |
37 | use function is_string; |
38 | |
39 | /** |
40 | * Class for generating sitemaps |
41 | * |
42 | * @category VuFind |
43 | * @package Sitemap |
44 | * @author Demian Katz <demian.katz@villanova.edu> |
45 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
46 | * @link https://vufind.org Main Page |
47 | */ |
48 | class Generator |
49 | { |
50 | /** |
51 | * Base URL for site |
52 | * |
53 | * @var string |
54 | */ |
55 | protected $baseUrl; |
56 | |
57 | /** |
58 | * Base URL for sitemap |
59 | * |
60 | * @var string |
61 | */ |
62 | protected $baseSitemapUrl; |
63 | |
64 | /** |
65 | * Languages enabled for sitemaps |
66 | * |
67 | * @var array |
68 | */ |
69 | protected $languages; |
70 | |
71 | /** |
72 | * Sitemap configuration (sitemap.ini) |
73 | * |
74 | * @var Config |
75 | */ |
76 | protected $config; |
77 | |
78 | /** |
79 | * Generator plugin manager |
80 | * |
81 | * @var PluginManager |
82 | */ |
83 | protected $pluginManager; |
84 | |
85 | /** |
86 | * Frequency of URL updates (always, daily, weekly, monthly, yearly, never) |
87 | * |
88 | * @var string |
89 | */ |
90 | protected $frequency; |
91 | |
92 | /** |
93 | * URL entries per sitemap |
94 | * |
95 | * @var int |
96 | */ |
97 | protected $countPerPage; |
98 | |
99 | /** |
100 | * Output file path |
101 | * |
102 | * @var string |
103 | */ |
104 | protected $fileLocation; |
105 | |
106 | /** |
107 | * Base path to sitemap files, including base filename |
108 | * |
109 | * @var string |
110 | */ |
111 | protected $fileStart; |
112 | |
113 | /** |
114 | * Filename of sitemap index |
115 | * |
116 | * @var string |
117 | */ |
118 | protected $indexFile = false; |
119 | |
120 | /** |
121 | * Warnings thrown during sitemap generation |
122 | * |
123 | * @var array |
124 | */ |
125 | protected $warnings = []; |
126 | |
127 | /** |
128 | * Verbose callback |
129 | * |
130 | * @var callable |
131 | */ |
132 | protected $verbose = null; |
133 | |
134 | /** |
135 | * Constructor |
136 | * |
137 | * @param string $baseUrl VuFind base URL |
138 | * @param Config $config Sitemap configuration settings |
139 | * @param array $locales Enabled locales |
140 | * @param PluginManager $pm Generator plugin manager |
141 | */ |
142 | public function __construct( |
143 | $baseUrl, |
144 | Config $config, |
145 | array $locales, |
146 | PluginManager $pm |
147 | ) { |
148 | // Save incoming parameters: |
149 | $this->baseUrl = $baseUrl; |
150 | $this->config = $config; |
151 | $this->pluginManager = $pm; |
152 | |
153 | $this->languages = $this->getSitemapLanguages($locales); |
154 | |
155 | $this->baseSitemapUrl = empty($this->config->SitemapIndex->baseSitemapUrl) |
156 | ? $this->baseUrl : $this->config->SitemapIndex->baseSitemapUrl; |
157 | |
158 | $this->frequency = $this->config->Sitemap->frequency ?? 'weekly'; |
159 | $this->countPerPage = $this->config->Sitemap->countPerPage ?? 10000; |
160 | $this->fileLocation = $this->config->Sitemap->fileLocation ?? '/tmp'; |
161 | $this->fileStart = $this->config->Sitemap->fileName ?? 'sitemap'; |
162 | if (isset($this->config->SitemapIndex->indexFileName)) { |
163 | $this->indexFile = $this->config->SitemapIndex->indexFileName . '.xml'; |
164 | } |
165 | } |
166 | |
167 | /** |
168 | * Get/set verbose callback |
169 | * |
170 | * @param callable|null $newMode Callback for writing verbose messages (or null |
171 | * to disable them) |
172 | * |
173 | * @return callable|null Current verbose callback (null if disabled) |
174 | */ |
175 | public function setVerbose($newMode = null) |
176 | { |
177 | if (null !== $newMode) { |
178 | $this->verbose = $newMode; |
179 | } |
180 | return $this->verbose; |
181 | } |
182 | |
183 | /** |
184 | * Write a verbose message (if configured to do so) |
185 | * |
186 | * @param string $msg Message to display |
187 | * |
188 | * @return void |
189 | */ |
190 | protected function verboseMsg($msg) |
191 | { |
192 | if (is_callable($this->verbose)) { |
193 | call_user_func($this->verbose, $msg); |
194 | } |
195 | } |
196 | |
197 | /** |
198 | * Get/set base url |
199 | * |
200 | * @param string $newUrl New base url |
201 | * |
202 | * @return string Current or new base url |
203 | */ |
204 | public function setBaseUrl($newUrl = null) |
205 | { |
206 | if (null !== $newUrl) { |
207 | $this->baseUrl = $newUrl; |
208 | } |
209 | return $this->baseUrl; |
210 | } |
211 | |
212 | /** |
213 | * Get/set base sitemap url |
214 | * |
215 | * @param string $newUrl New base sitemap url |
216 | * |
217 | * @return string Current or new base sitemap url |
218 | */ |
219 | public function setBaseSitemapUrl($newUrl = null) |
220 | { |
221 | if (null !== $newUrl) { |
222 | $this->baseSitemapUrl = $newUrl; |
223 | } |
224 | return $this->baseSitemapUrl; |
225 | } |
226 | |
227 | /** |
228 | * Get/set output file path |
229 | * |
230 | * @param string $newLocation New path |
231 | * |
232 | * @return string Current or new path |
233 | */ |
234 | public function setFileLocation(?string $newLocation = null): string |
235 | { |
236 | if (null !== $newLocation) { |
237 | $this->fileLocation = $newLocation; |
238 | } |
239 | return $this->fileLocation; |
240 | } |
241 | |
242 | /** |
243 | * Get the current microtime, formatted to a number. |
244 | * |
245 | * @return float |
246 | */ |
247 | protected function getTime() |
248 | { |
249 | $time = explode(' ', microtime()); |
250 | return $time[1] + $time[0]; |
251 | } |
252 | |
253 | /** |
254 | * Generate the sitemaps based on settings established by the constructor. |
255 | * |
256 | * @return void |
257 | */ |
258 | public function generate() |
259 | { |
260 | // Start timer: |
261 | $startTime = $this->getTime(); |
262 | |
263 | // Set-up Sitemap Index |
264 | $this->buildIndex($this->generateWithPlugins()); |
265 | |
266 | // Display total elapsed time in verbose mode: |
267 | $this->verboseMsg( |
268 | 'Elapsed time (in seconds): ' . round($this->getTime() - $startTime) |
269 | ); |
270 | } |
271 | |
272 | /** |
273 | * Generate sitemaps from all mandatory and configured plugins |
274 | * |
275 | * @return array |
276 | */ |
277 | protected function generateWithPlugins(): array |
278 | { |
279 | $sitemapFiles = []; |
280 | $sitemapIndexes = []; |
281 | $writeMap = function ( |
282 | $sitemap, |
283 | $name |
284 | ) use ( |
285 | &$sitemapFiles, |
286 | &$sitemapIndexes |
287 | ) { |
288 | $index = ($sitemapIndexes[$name] ?? 0) + 1; |
289 | $sitemapIndexes[$name] = $index; |
290 | $pageName = empty($name) ? $index : "$name-$index"; |
291 | $filePath = $this->getFilenameForPage($pageName); |
292 | if (false === $sitemap->write($filePath)) { |
293 | throw new \Exception("Problem writing $filePath."); |
294 | } |
295 | $sitemapFiles[] = $this->getFilenameForPage($pageName, false); |
296 | }; |
297 | |
298 | // If no plugins are defined, use the Index plugin by default: |
299 | $plugins = isset($this->config->Sitemap->plugins) |
300 | ? $this->config->Sitemap->plugins->toArray() : ['Index']; |
301 | $pluginSitemaps = []; |
302 | foreach ($plugins as $pluginName) { |
303 | $plugin = $this->getPlugin($pluginName); |
304 | $sitemapName = $plugin->getSitemapName(); |
305 | $msgName = empty($sitemapName) |
306 | ? 'core sitemap' : "sitemap '$sitemapName'"; |
307 | $this->verboseMsg( |
308 | "Generating $msgName with '$pluginName'" |
309 | ); |
310 | if (!isset($pluginSitemaps[$sitemapName])) { |
311 | $pluginSitemaps[$sitemapName] = $this->getNewSitemap(); |
312 | } |
313 | $languages = $plugin->supportsVuFindLanguages() |
314 | ? $this->languages : []; |
315 | $frequency = $plugin->getFrequency(); |
316 | $sitemap = &$pluginSitemaps[$sitemapName]; |
317 | $count = $sitemap->getCount(); |
318 | foreach ($plugin->getUrls() as $url) { |
319 | ++$count; |
320 | if ($count > $this->countPerPage) { |
321 | // Write the current sitemap and clear all entries from it: |
322 | $writeMap($sitemap, $sitemapName); |
323 | $sitemap->clear(); |
324 | $count = 1; |
325 | } |
326 | $dataToAdd = (($languages || $frequency) && is_string($url)) |
327 | ? compact('url', 'languages', 'frequency') : $url; |
328 | $sitemap->addUrl($dataToAdd); |
329 | } |
330 | // Unset the reference: |
331 | unset($sitemap); |
332 | } |
333 | // Write remaining sitemaps: |
334 | foreach ($pluginSitemaps as $sitemapName => $sitemap) { |
335 | if (!$sitemap->isEmpty()) { |
336 | $writeMap($sitemap, $sitemapName); |
337 | } |
338 | } |
339 | return $sitemapFiles; |
340 | } |
341 | |
342 | /** |
343 | * Get array of warning messages thrown during build. |
344 | * |
345 | * @return array |
346 | */ |
347 | public function getWarnings() |
348 | { |
349 | return $this->warnings; |
350 | } |
351 | |
352 | /** |
353 | * Write a sitemap index if requested. |
354 | * |
355 | * @param array $sitemaps Sitemaps to add to the index. |
356 | * |
357 | * @return void |
358 | */ |
359 | protected function buildIndex(array $sitemaps) |
360 | { |
361 | // Only build index file if requested: |
362 | if ($this->indexFile !== false) { |
363 | $smf = $this->getNewSitemapIndex(); |
364 | $baseUrl = $this->getBaseSitemapIndexUrl(); |
365 | |
366 | // Add a <sitemap /> group for a static sitemap file. |
367 | // See sitemap.ini for more information on this option. |
368 | $baseSitemapFileName = $this->config->SitemapIndex->baseSitemapFileName |
369 | ?? ''; |
370 | if ($baseSitemapFileName) { |
371 | $baseSitemapFileName .= '.xml'; |
372 | $baseSitemapFilePath = $this->fileLocation . '/' |
373 | . $baseSitemapFileName; |
374 | // Only add the <sitemap /> group if the file exists |
375 | // in the directory where the other sitemap files |
376 | // are saved, i.e. ['Sitemap']['fileLocation'] |
377 | if (file_exists($baseSitemapFilePath)) { |
378 | $smf->addUrl($baseUrl . '/' . $baseSitemapFileName); |
379 | } else { |
380 | $this->warnings[] = "WARNING: Can't open file " |
381 | . $baseSitemapFilePath . '. ' |
382 | . 'The sitemap index will be generated ' |
383 | . 'without this sitemap file.'; |
384 | } |
385 | } |
386 | |
387 | foreach ($sitemaps as $sitemap) { |
388 | $smf->addUrl($baseUrl . '/' . $sitemap); |
389 | } |
390 | |
391 | if ( |
392 | false === $smf->write($this->fileLocation . '/' . $this->indexFile) |
393 | ) { |
394 | throw new \Exception("Problem writing $this->indexFile."); |
395 | } |
396 | } |
397 | } |
398 | |
399 | /** |
400 | * Get a fresh SitemapIndex object. |
401 | * |
402 | * @return SitemapIndex |
403 | */ |
404 | protected function getNewSitemapIndex() |
405 | { |
406 | return new SitemapIndex(); |
407 | } |
408 | |
409 | /** |
410 | * Get a fresh Sitemap object. |
411 | * |
412 | * @return Sitemap |
413 | */ |
414 | protected function getNewSitemap() |
415 | { |
416 | return new Sitemap($this->frequency); |
417 | } |
418 | |
419 | /** |
420 | * Get the filename for the specified page number or name. |
421 | * |
422 | * @param int|string $page Page number or name |
423 | * @param bool $includePath Whether to include the path name |
424 | * |
425 | * @return string |
426 | */ |
427 | protected function getFilenameForPage($page, $includePath = true) |
428 | { |
429 | return ($includePath ? $this->fileLocation . '/' : '') |
430 | . $this->fileStart . ($page == 1 ? '' : '-' . $page) . '.xml'; |
431 | } |
432 | |
433 | /** |
434 | * Get the base URL for sitemap index files |
435 | * |
436 | * @return string |
437 | */ |
438 | protected function getBaseSitemapIndexUrl() |
439 | { |
440 | // Pick the appropriate base URL based on the configuration files: |
441 | return $this->baseSitemapUrl; |
442 | } |
443 | |
444 | /** |
445 | * Create and setup a plugin |
446 | * |
447 | * @param string $pluginName Plugin name |
448 | * |
449 | * @return Plugin\GeneratorPluginInterface |
450 | */ |
451 | protected function getPlugin(string $pluginName): Plugin\GeneratorPluginInterface |
452 | { |
453 | $plugin = $this->pluginManager->get($pluginName); |
454 | $verboseCallback = function (string $msg): void { |
455 | $this->verboseMsg($msg); |
456 | }; |
457 | $plugin->setOptions( |
458 | [ |
459 | 'baseUrl' => $this->baseUrl, |
460 | 'baseSitemapUrl' => $this->baseSitemapUrl, |
461 | 'verboseMessageCallback' => $verboseCallback, |
462 | ] |
463 | ); |
464 | return $plugin; |
465 | } |
466 | |
467 | /** |
468 | * Get languages for a sitemap |
469 | * |
470 | * Returns an array with sitemap languages as keys and VuFind languages as |
471 | * values. |
472 | * |
473 | * @param array $locales Enabled VuFind locales |
474 | * |
475 | * @return array |
476 | */ |
477 | protected function getSitemapLanguages(array $locales): array |
478 | { |
479 | if (empty($this->config->Sitemap->indexLanguageVersions)) { |
480 | return []; |
481 | } |
482 | if (trim($this->config->Sitemap->indexLanguageVersions) === '*') { |
483 | $filter = []; |
484 | } else { |
485 | $filter = array_map( |
486 | 'trim', |
487 | explode(',', $this->config->Sitemap->indexLanguageVersions) |
488 | ); |
489 | } |
490 | $result = []; |
491 | // Add languages and fallbacks for non-locale specific languages: |
492 | if ($filter) { |
493 | $locales = array_intersect($locales, $filter); |
494 | } |
495 | foreach ($locales as $locale) { |
496 | $parts = explode('-', $locale, 2); |
497 | $langPart = $parts[0]; |
498 | $regionPart = $parts[1] ?? ''; |
499 | if (!$regionPart) { |
500 | $result[$locale] = $locale; |
501 | } else { |
502 | $sitemapLocale = $langPart . '-' . strtoupper($regionPart); |
503 | $result[$sitemapLocale] = $locale; |
504 | // If the fallback language is not enabled in VuFind, add the |
505 | // locale-specific language as the fallback: |
506 | if (!in_array($langPart, $locales)) { |
507 | $result[$langPart] = $locale; |
508 | } |
509 | } |
510 | } |
511 | // If any languages are active, add the sitemap default language without a |
512 | // target language code to the list as well: |
513 | if ($result) { |
514 | $result['x-default'] = null; |
515 | } |
516 | |
517 | return $result; |
518 | } |
519 | } |