Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 65 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
HarvesterFactory | |
0.00% |
0 / 65 |
|
0.00% |
0 / 13 |
1190 | |
0.00% |
0 / 1 |
addAutoSslOptions | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
getClientOptions | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
56 | |||
configureClient | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getBasePath | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getCommunicator | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
getFormatter | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getConsoleWriter | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
12 | |||
getResponseProcessor | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSetLoader | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getStateManager | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWriter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWriterStrategyFactory | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHarvester | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | /** |
4 | * Factory for OAI-PMH Harvest Tool |
5 | * |
6 | * PHP version 7 |
7 | * |
8 | * Copyright (c) Demian Katz 2010. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | * |
23 | * @category VuFind |
24 | * @package Harvest_Tools |
25 | * @author Demian Katz <demian.katz@villanova.edu> |
26 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
27 | * @link https://vufind.org/wiki/indexing:oai-pmh Wiki |
28 | */ |
29 | |
30 | namespace VuFindHarvest\OaiPmh; |
31 | |
32 | use Laminas\Http\Client; |
33 | use Symfony\Component\Console\Output\OutputInterface; |
34 | use VuFindHarvest\ConsoleOutput\ConsoleWriter; |
35 | use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyFactory; |
36 | use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyInterface; |
37 | use VuFindHarvest\ResponseProcessor\ResponseProcessorInterface; |
38 | use VuFindHarvest\ResponseProcessor\SimpleXmlResponseProcessor; |
39 | |
40 | /** |
41 | * Factory for OAI-PMH Harvest Tool |
42 | * |
43 | * @category VuFind |
44 | * @package Harvest_Tools |
45 | * @author Demian Katz <demian.katz@villanova.edu> |
46 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
47 | * @link https://vufind.org/wiki/indexing:oai-pmh Wiki |
48 | * |
49 | * @SuppressWarnings(PHPMD.CouplingBetweenObjects) |
50 | */ |
51 | class HarvesterFactory |
52 | { |
53 | /** |
54 | * Add SSL options to $options if standard files can be autodetected. |
55 | * |
56 | * @param array $options Options to modify. |
57 | * |
58 | * @return void |
59 | */ |
60 | protected function addAutoSslOptions(&$options) |
61 | { |
62 | // RedHat/CentOS: |
63 | if (file_exists('/etc/pki/tls/cert.pem')) { |
64 | $options['sslcafile'] = '/etc/pki/tls/cert.pem'; |
65 | } |
66 | // Debian/Ubuntu: |
67 | if (file_exists('/etc/ssl/certs')) { |
68 | $options['sslcapath'] = '/etc/ssl/certs'; |
69 | } |
70 | } |
71 | |
72 | /** |
73 | * Get HTTP client options from $settings array |
74 | * |
75 | * @param array $settings Settings |
76 | * |
77 | * @return array |
78 | */ |
79 | protected function getClientOptions(array $settings) |
80 | { |
81 | $options = [ |
82 | 'timeout' => $settings['timeout'] ?? 60, |
83 | ]; |
84 | if (isset($settings['autosslca']) && $settings['autosslca']) { |
85 | $this->addAutoSslOptions($options); |
86 | } |
87 | foreach (['sslcafile', 'sslcapath'] as $sslSetting) { |
88 | if (isset($settings[$sslSetting])) { |
89 | $options[$sslSetting] = $settings[$sslSetting]; |
90 | } |
91 | } |
92 | if (isset($settings['sslverifypeer']) && !$settings['sslverifypeer']) { |
93 | $options['sslverifypeer'] = false; |
94 | } |
95 | return $options; |
96 | } |
97 | |
98 | /** |
99 | * Configure the HTTP client |
100 | * |
101 | * @param Client $client HTTP client |
102 | * @param array $settings Settings |
103 | * |
104 | * @return Client |
105 | * |
106 | * @throws Exception |
107 | */ |
108 | protected function configureClient(Client $client, array $settings) |
109 | { |
110 | $configuredClient = $client ?: new Client(); |
111 | |
112 | // Set authentication, if necessary: |
113 | if (!empty($settings['httpUser']) && !empty($settings['httpPass'])) { |
114 | $configuredClient->setAuth($settings['httpUser'], $settings['httpPass']); |
115 | } |
116 | |
117 | // Set up assorted client options from $settings array: |
118 | $configuredClient->setOptions($this->getClientOptions($settings)); |
119 | |
120 | return $configuredClient; |
121 | } |
122 | |
123 | /** |
124 | * Set up directory structure for harvesting. |
125 | * |
126 | * @param string $harvestRoot Root directory containing harvested data. |
127 | * @param string $target The OAI-PMH target directory to create inside |
128 | * $harvestRoot. |
129 | * |
130 | * @return string |
131 | */ |
132 | protected function getBasePath($harvestRoot, $target) |
133 | { |
134 | // Build the full harvest path: |
135 | $basePath = rtrim($harvestRoot, '/') . '/' . rtrim($target, '/') . '/'; |
136 | |
137 | // Create the directory if it does not already exist: |
138 | if (!is_dir($basePath)) { |
139 | if (!mkdir($basePath)) { |
140 | throw new \Exception("Problem creating directory {$basePath}."); |
141 | } |
142 | } |
143 | |
144 | return $basePath; |
145 | } |
146 | |
147 | /** |
148 | * Get the communicator. |
149 | * |
150 | * @param Client $client HTTP client |
151 | * @param array $settings Additional settings |
152 | * @param ResponseProcessorInterface $processor Response processor |
153 | * @param string $target Target being configured (used for |
154 | * error messages) |
155 | * @param OutputInterface $output Output interface |
156 | * |
157 | * @return Communicator |
158 | */ |
159 | protected function getCommunicator( |
160 | Client $client, |
161 | array $settings, |
162 | ResponseProcessorInterface $processor, |
163 | $target, |
164 | OutputInterface $output = null |
165 | ) { |
166 | if (empty($settings['url'])) { |
167 | throw new \Exception("Missing base URL for {$target}."); |
168 | } |
169 | $comm = new Communicator($settings['url'], $client, $processor); |
170 | // We only want the communicator to output messages if we are in verbose |
171 | // mode; communicator messages are considered verbose output. |
172 | if ( |
173 | ($settings['verbose'] ?? false) |
174 | && $writer = $this->getConsoleWriter($output, $settings) |
175 | ) { |
176 | $comm->setOutputWriter($writer); |
177 | } |
178 | return $comm; |
179 | } |
180 | |
181 | /** |
182 | * Get the record XML formatter. |
183 | * |
184 | * @param Communicator $communicator Communicator |
185 | * @param array $settings Additional settings |
186 | * @param OutputInterface $output Output interface |
187 | * |
188 | * @return RecordXmlFormatter |
189 | */ |
190 | protected function getFormatter( |
191 | Communicator $communicator, |
192 | array $settings, |
193 | OutputInterface $output = null |
194 | ) { |
195 | // Build the formatter: |
196 | $formatter = new RecordXmlFormatter($settings); |
197 | |
198 | // Load set names if we're going to need them: |
199 | if ($formatter->needsSetNames()) { |
200 | $loader = $this->getSetLoader($communicator, $settings); |
201 | if ($writer = $this->getConsoleWriter($output, $settings)) { |
202 | $loader->setOutputWriter($writer); |
203 | } |
204 | $formatter->setSetNames($loader->getNames()); |
205 | } |
206 | |
207 | return $formatter; |
208 | } |
209 | |
210 | /** |
211 | * Get console output writer (if applicable). |
212 | * |
213 | * @param OutputInterface $output Output interface |
214 | * @param array $settings OAI-PMH settings |
215 | * |
216 | * @return ConsoleWriter |
217 | */ |
218 | protected function getConsoleWriter(?OutputInterface $output, $settings) |
219 | { |
220 | // Don't create a writer if we're in silent mode or have no |
221 | // available output interface. |
222 | return (($settings['silent'] ?? false) || $output === null) |
223 | ? null : new ConsoleWriter($output); |
224 | } |
225 | |
226 | /** |
227 | * Get XML response processor. |
228 | * |
229 | * @param string $basePath Base path for harvest |
230 | * @param array $settings OAI-PMH settings |
231 | * |
232 | * @return SimpleXmlResponseProcessor |
233 | */ |
234 | protected function getResponseProcessor($basePath, array $settings) |
235 | { |
236 | return new SimpleXmlResponseProcessor($basePath, $settings); |
237 | } |
238 | |
239 | /** |
240 | * Get the set loader (used to load set names). |
241 | * |
242 | * @param Communicator $communicator API communicator |
243 | * @param array $settings OAI-PMH settings |
244 | * |
245 | * @return SetLoader |
246 | */ |
247 | protected function getSetLoader(Communicator $communicator, array $settings) |
248 | { |
249 | return new SetLoader($communicator, $settings); |
250 | } |
251 | |
252 | /** |
253 | * Get state manager |
254 | * |
255 | * @param string $basePath Base path for harvest |
256 | * |
257 | * @return StateManager |
258 | */ |
259 | protected function getStateManager($basePath) |
260 | { |
261 | return new StateManager($basePath); |
262 | } |
263 | |
264 | /** |
265 | * Build the writer support object. |
266 | * |
267 | * @param RecordWriterStrategyInterface $strategy Writing strategy |
268 | * @param RecordXmlFormatter $formatter XML record formatter |
269 | * @param array $settings Configuration settings |
270 | * |
271 | * @return RecordWriter |
272 | */ |
273 | protected function getWriter( |
274 | RecordWriterStrategyInterface $strategy, |
275 | RecordXmlFormatter $formatter, |
276 | array $settings |
277 | ) { |
278 | return new RecordWriter($strategy, $formatter, $settings); |
279 | } |
280 | |
281 | /** |
282 | * Get the factory for record writer strategies. |
283 | * |
284 | * @return RecordWriterStrategyFactory |
285 | */ |
286 | protected function getWriterStrategyFactory() |
287 | { |
288 | return new RecordWriterStrategyFactory(); |
289 | } |
290 | |
291 | /** |
292 | * Get the harvester |
293 | * |
294 | * @param string $target Name of source being harvested (used as |
295 | * directory name for storing harvested data inside $harvestRoot) |
296 | * @param string $harvestRoot Root directory containing harvested data. |
297 | * @param Client $client HTTP client |
298 | * @param array $settings Additional settings |
299 | * @param OutputInterface $output Output interface (optional) |
300 | * |
301 | * @return Harvester |
302 | * |
303 | * @throws \Exception |
304 | */ |
305 | public function getHarvester( |
306 | $target, |
307 | $harvestRoot, |
308 | Client $client = null, |
309 | array $settings = [], |
310 | OutputInterface $output = null |
311 | ) { |
312 | $basePath = $this->getBasePath($harvestRoot, $target); |
313 | $responseProcessor = $this->getResponseProcessor($basePath, $settings); |
314 | $communicator = $this->getCommunicator( |
315 | $this->configureClient($client, $settings), |
316 | $settings, |
317 | $responseProcessor, |
318 | $target, |
319 | $output |
320 | ); |
321 | $formatter = $this->getFormatter($communicator, $settings, $output); |
322 | $strategy = $this->getWriterStrategyFactory() |
323 | ->getStrategy($basePath, $settings); |
324 | $writer = $this->getWriter($strategy, $formatter, $settings); |
325 | $stateManager = $this->getStateManager($basePath); |
326 | $harvester = new Harvester($communicator, $writer, $stateManager, $settings); |
327 | if ($writer = $this->getConsoleWriter($output, $settings)) { |
328 | $harvester->setOutputWriter($writer); |
329 | } |
330 | return $harvester; |
331 | } |
332 | } |