Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 64
0.00% covered (danger)
0.00%
0 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
HarvesterFactory
0.00% covered (danger)
0.00%
0 / 64
0.00% covered (danger)
0.00%
0 / 13
1190
0.00% covered (danger)
0.00%
0 / 1
 addAutoSslOptions
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
12
 getClientOptions
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
56
 configureClient
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
20
 getBasePath
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
12
 getCommunicator
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
20
 getFormatter
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
 getConsoleWriter
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
12
 getResponseProcessor
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getSetLoader
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getStateManager
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getWriter
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getWriterStrategyFactory
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getHarvester
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3/**
4 * Factory for OAI-PMH Harvest Tool
5 *
6 * PHP version 7
7 *
8 * Copyright (c) Demian Katz 2010.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2,
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 *
23 * @category VuFind
24 * @package  Harvest_Tools
25 * @author   Demian Katz <demian.katz@villanova.edu>
26 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
27 * @link     https://vufind.org/wiki/indexing:oai-pmh Wiki
28 */
29
30namespace VuFindHarvest\OaiPmh;
31
32use Laminas\Http\Client;
33use Symfony\Component\Console\Output\OutputInterface;
34use VuFindHarvest\ConsoleOutput\ConsoleWriter;
35use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyFactory;
36use VuFindHarvest\RecordWriterStrategy\RecordWriterStrategyInterface;
37use VuFindHarvest\ResponseProcessor\ResponseProcessorInterface;
38use VuFindHarvest\ResponseProcessor\SimpleXmlResponseProcessor;
39
40/**
41 * Factory for OAI-PMH Harvest Tool
42 *
43 * @category VuFind
44 * @package  Harvest_Tools
45 * @author   Demian Katz <demian.katz@villanova.edu>
46 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
47 * @link     https://vufind.org/wiki/indexing:oai-pmh Wiki
48 *
49 * @SuppressWarnings(PHPMD.CouplingBetweenObjects)
50 */
51class HarvesterFactory
52{
53    /**
54     * Add SSL options to $options if standard files can be autodetected.
55     *
56     * @param array $options Options to modify.
57     *
58     * @return void
59     */
60    protected function addAutoSslOptions(&$options)
61    {
62        // RedHat/CentOS:
63        if (file_exists('/etc/pki/tls/cert.pem')) {
64            $options['sslcafile'] = '/etc/pki/tls/cert.pem';
65        }
66        // Debian/Ubuntu:
67        if (file_exists('/etc/ssl/certs')) {
68            $options['sslcapath'] = '/etc/ssl/certs';
69        }
70    }
71
72    /**
73     * Get HTTP client options from $settings array
74     *
75     * @param array $settings Settings
76     *
77     * @return array
78     */
79    protected function getClientOptions(array $settings)
80    {
81        $options = [
82            'timeout' => $settings['timeout'] ?? 60,
83        ];
84        if (isset($settings['autosslca']) && $settings['autosslca']) {
85            $this->addAutoSslOptions($options);
86        }
87        foreach (['sslcafile', 'sslcapath'] as $sslSetting) {
88            if (isset($settings[$sslSetting])) {
89                $options[$sslSetting] = $settings[$sslSetting];
90            }
91        }
92        if (isset($settings['sslverifypeer']) && !$settings['sslverifypeer']) {
93            $options['sslverifypeer'] = false;
94        }
95        return $options;
96    }
97
98    /**
99     * Configure the HTTP client
100     *
101     * @param Client $client   HTTP client
102     * @param array  $settings Settings
103     *
104     * @return Client
105     *
106     * @throws Exception
107     */
108    protected function configureClient(Client $client, array $settings)
109    {
110        $configuredClient = $client ?: new Client();
111
112        // Set authentication, if necessary:
113        if (!empty($settings['httpUser']) && !empty($settings['httpPass'])) {
114            $configuredClient->setAuth($settings['httpUser'], $settings['httpPass']);
115        }
116
117        // Set up assorted client options from $settings array:
118        $configuredClient->setOptions($this->getClientOptions($settings));
119
120        return $configuredClient;
121    }
122
123    /**
124     * Set up directory structure for harvesting.
125     *
126     * @param string $harvestRoot Root directory containing harvested data.
127     * @param string $target      The OAI-PMH target directory to create inside
128     * $harvestRoot.
129     *
130     * @return string
131     */
132    protected function getBasePath($harvestRoot, $target)
133    {
134        // Build the full harvest path:
135        $basePath = rtrim($harvestRoot, '/') . '/' . rtrim($target, '/') . '/';
136
137        // Create the directory if it does not already exist:
138        if (!is_dir($basePath)) {
139            if (!mkdir($basePath)) {
140                throw new \Exception("Problem creating directory {$basePath}.");
141            }
142        }
143
144        return $basePath;
145    }
146
147    /**
148     * Get the communicator.
149     *
150     * @param Client                     $client    HTTP client
151     * @param array                      $settings  Additional settings
152     * @param ResponseProcessorInterface $processor Response processor
153     * @param string                     $target    Target being configured (used for
154     * error messages)
155     * @param OutputInterface            $output    Output interface
156     *
157     * @return Communicator
158     */
159    protected function getCommunicator(
160        Client $client,
161        array $settings,
162        ResponseProcessorInterface $processor,
163        $target,
164        OutputInterface $output = null
165    ) {
166        if (empty($settings['url'])) {
167            throw new \Exception("Missing base URL for {$target}.");
168        }
169        $comm = new Communicator($settings['url'], $client, $processor);
170        // We only want the communicator to output messages if we are in verbose
171        // mode; communicator messages are considered verbose output.
172        if (
173            ($settings['verbose'] ?? false)
174            && $writer = $this->getConsoleWriter($output, $settings)
175        ) {
176            $comm->setOutputWriter($writer);
177        }
178        return $comm;
179    }
180
181    /**
182     * Get the record XML formatter.
183     *
184     * @param Communicator    $communicator Communicator
185     * @param array           $settings     Additional settings
186     * @param OutputInterface $output       Output interface
187     *
188     * @return RecordXmlFormatter
189     */
190    protected function getFormatter(
191        Communicator $communicator,
192        array $settings,
193        OutputInterface $output = null
194    ) {
195        // Build the formatter:
196        $formatter = new RecordXmlFormatter($settings);
197
198        // Load set names if we're going to need them:
199        if ($formatter->needsSetNames()) {
200            $loader = $this->getSetLoader($communicator, $settings);
201            if ($writer = $this->getConsoleWriter($output, $settings)) {
202                $loader->setOutputWriter($writer);
203            }
204            $formatter->setSetNames($loader->getNames());
205        }
206
207        return $formatter;
208    }
209
210    /**
211     * Get console output writer (if applicable).
212     *
213     * @param OutputInterface $output   Output interface
214     * @param array           $settings OAI-PMH settings
215     *
216     * @return ConsoleWriter
217     */
218    protected function getConsoleWriter(?OutputInterface $output, $settings)
219    {
220        // Don't create a writer if we're in silent mode or have no
221        // available output interface.
222        return (($settings['silent'] ?? false) || $output === null)
223            ? null : new ConsoleWriter($output);
224    }
225
226    /**
227     * Get XML response processor.
228     *
229     * @param string $basePath Base path for harvest
230     * @param array  $settings OAI-PMH settings
231     *
232     * @return SimpleXmlResponseProcessor
233     */
234    protected function getResponseProcessor($basePath, array $settings)
235    {
236        return new SimpleXmlResponseProcessor($basePath, $settings);
237    }
238
239    /**
240     * Get the set loader (used to load set names).
241     *
242     * @param Communicator $communicator API communicator
243     * @param array        $settings     OAI-PMH settings
244     *
245     * @return SetLoader
246     */
247    protected function getSetLoader(Communicator $communicator, array $settings)
248    {
249        return new SetLoader($communicator, $settings);
250    }
251
252    /**
253     * Get state manager
254     *
255     * @param string $basePath Base path for harvest
256     *
257     * @return StateManager
258     */
259    protected function getStateManager($basePath)
260    {
261        return new StateManager($basePath);
262    }
263
264    /**
265     * Build the writer support object.
266     *
267     * @param RecordWriterStrategyInterface $strategy  Writing strategy
268     * @param RecordXmlFormatter            $formatter XML record formatter
269     * @param array                         $settings  Configuration settings
270     *
271     * @return RecordWriter
272     */
273    protected function getWriter(
274        RecordWriterStrategyInterface $strategy,
275        RecordXmlFormatter $formatter,
276        array $settings
277    ) {
278        return new RecordWriter($strategy, $formatter, $settings);
279    }
280
281    /**
282     * Get the factory for record writer strategies.
283     *
284     * @return RecordWriterStrategyFactory
285     */
286    protected function getWriterStrategyFactory()
287    {
288        return new RecordWriterStrategyFactory();
289    }
290
291    /**
292     * Get the harvester
293     *
294     * @param string          $target      Name of source being harvested (used as
295     * directory name for storing harvested data inside $harvestRoot)
296     * @param string          $harvestRoot Root directory containing harvested data.
297     * @param Client          $client      HTTP client
298     * @param array           $settings    Additional settings
299     * @param OutputInterface $output      Output interface (optional)
300     *
301     * @return Harvester
302     *
303     * @throws \Exception
304     */
305    public function getHarvester(
306        $target,
307        $harvestRoot,
308        Client $client = null,
309        array $settings = [],
310        OutputInterface $output = null
311    ) {
312        $basePath = $this->getBasePath($harvestRoot, $target);
313        $responseProcessor = $this->getResponseProcessor($basePath, $settings);
314        $communicator = $this->getCommunicator(
315            $this->configureClient($client, $settings),
316            $settings,
317            $responseProcessor,
318            $target,
319            $output
320        );
321        $formatter = $this->getFormatter($communicator, $settings, $output);
322        $strategy = $this->getWriterStrategyFactory()
323            ->getStrategy($basePath, $settings);
324        $writer = $this->getWriter($strategy, $formatter, $settings);
325        $stateManager = $this->getStateManager($basePath);
326        $harvester = new Harvester($communicator, $writer, $stateManager, $settings);
327        if ($writer = $this->getConsoleWriter($output, $settings)) {
328            $harvester->setOutputWriter($writer);
329        }
330        return $harvester;
331    }
332}