Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.83% covered (success)
97.83%
90 / 92
75.00% covered (warning)
75.00%
6 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
Redi
97.83% covered (success)
97.83%
90 / 92
75.00% covered (warning)
75.00%
6 / 8
22
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 fetchLinks
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 parseLinks
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
2.01
 parseDOI
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
3
 parseRediInfo
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
4.02
 parseRediOpenURLs
100.00% covered (success)
100.00%
37 / 37
100.00% covered (success)
100.00%
1 / 1
6
 postProcessing
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 removeDoubleAngleQuotationMarks
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3/**
4 * ReDi Link Resolver Driver
5 *
6 * PHP version 8
7 *
8 * Copyright (C) Leipzig University Library 2015
9 *
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2,
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 *
24 * @category VuFind
25 * @package  Resolver_Drivers
26 * @author   André Lahmann <lahmann@ub.uni-leipzig.de>
27 * @author   Gregor Gawol <gawol@ub.uni-leipzig.de>
28 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
29 * @link     https://vufind.org/wiki/development:plugins:link_resolver_drivers Wiki
30 */
31
32namespace VuFind\Resolver\Driver;
33
34use DOMDocument;
35use Laminas\Dom\DOMXPath;
36
37use function chr;
38use function count;
39
40/**
41 * ReDi Link Resolver Driver
42 *
43 * @category VuFind
44 * @package  Resolver_Drivers
45 * @author   André Lahmann <lahmann@ub.uni-leipzig.de>
46 * @author   Gregor Gawol <gawol@ub.uni-leipzig.de>
47 * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
48 * @link     https://vufind.org/wiki/development:plugins:link_resolver_drivers Wiki
49 */
50class Redi extends AbstractBase
51{
52    /**
53     * HTTP client
54     *
55     * @var \Laminas\Http\Client
56     */
57    protected $httpClient;
58
59    /**
60     * Parsed resolver links
61     *
62     * @var array
63     */
64    protected $links;
65
66    /**
67     * Constructor
68     *
69     * @param string               $baseUrl    Base URL for link resolver
70     * @param \Laminas\Http\Client $httpClient HTTP client
71     */
72    public function __construct($baseUrl, \Laminas\Http\Client $httpClient)
73    {
74        parent::__construct($baseUrl);
75        $this->httpClient = $httpClient;
76    }
77
78    /**
79     * Fetch Links
80     *
81     * Fetches a set of links corresponding to an OpenURL
82     *
83     * @param string $openURL openURL (url-encoded)
84     *
85     * @return string         raw XML returned by resolver
86     */
87    public function fetchLinks($openURL)
88    {
89        $url = $this->getResolverUrl($openURL);
90        $feed = $this->httpClient->setUri($url)->send()->getBody();
91        return $feed;
92    }
93
94    /**
95     * Parse Links
96     *
97     * Parses an XML file returned by a link resolver
98     * and converts it to a standardised format for display
99     *
100     * @param string $xmlstr Raw XML returned by resolver
101     *
102     * @return array         Array of values
103     */
104    public function parseLinks($xmlstr)
105    {
106        $xml = new DOMDocument();
107        if (!@$xml->loadHTML($xmlstr)) {
108            return [];
109        }
110
111        // parse the raw resolver-data
112        $this->links = array_merge(
113            $this->parseDOI($xml),
114            $this->parseRediOpenURLs($xml)
115        );
116
117        // perform (individual) postprocessing on parsed resolver-data
118        $this->postProcessing();
119
120        return $this->links;
121    }
122
123    /**
124     * Parse the Redi XML response and return array with DOI information.
125     *
126     * @param DOMDocument $xml Loaded xml document
127     *
128     * @return array Get back a array with title, URL and service_type
129     */
130    protected function parseDOI($xml)
131    {
132        $retval = [];
133
134        $xpath = new DOMXPath($xml);
135
136        $doiTerm = $xpath
137            ->query("//dt[@class='doi_t']");
138        $doiDefinition = $xpath
139            ->query("//dd[@class='doi_d']");
140
141        if ($doiTerm->length == $doiDefinition->length) {
142            for ($i = 0; $i < $doiTerm->length; $i++) {
143                $href = $xpath
144                    ->query('.//@href', $doiDefinition->item($i))
145                    ->item(0)->textContent;
146                $retval[] = [
147                    'title' => $doiTerm->item($i)->textContent
148                        . $doiDefinition->item($i)->textContent,
149                    'href' => $href,
150                    'access' => 'unknown',
151                    'coverage' => null,
152                    'service_type' => 'getDOI',
153                ];
154            }
155        }
156
157        return $retval;
158    }
159
160    /**
161     * Parse Redi additional information elements and return the one identified by
162     * the infoToken provided (e.g. "*")
163     *
164     * @param DOMDocument $xml       Loaded xml document
165     * @param string      $infoToken InfoToken to search for
166     *
167     * @return string
168     */
169    protected function parseRediInfo($xml, $infoToken)
170    {
171        $xpath = new DOMXPath($xml);
172
173        // additional info nodes - marked by "<sup>*</sup>"
174        $infoTokenNodes = $xpath->query("//div[@id='t_ezb']/div[@class='t']/p/sup");
175
176        if ($infoTokenNodes->length > 0) {
177            for ($i = 0; $i < $infoTokenNodes->length; $i++) {
178                if ($infoToken == $infoTokenNodes->item($i)->textContent) {
179                    return $xpath
180                        ->query("//div[@id='t_ezb']/div[@class='t']/p/sup/..")
181                        ->item($i)->textContent;
182                }
183            }
184        }
185
186        return '';
187    }
188
189    /**
190     * Parse if the Redi xml snippet contains Redi urls.
191     *
192     * @param DOMDocument $xml Loaded xml document
193     *
194     * @return array Get back Redi direct link to sources containing title, URL and
195     *               service_type
196     */
197    protected function parseRediOpenURLs($xml)
198    {
199        $retval = [];
200
201        $xpath = new DOMXPath($xml);
202
203        $ezbResultsNodesText = $xpath
204            ->query("//div[@class='t_ezb_result']/p");
205        $ezbResultsNodesURL = $xpath
206            ->query("//div[@class='t_ezb_result']/p/span[@class='t_link']/a");
207
208        if ($ezbResultsNodesText->length == $ezbResultsNodesURL->length) {
209            for ($i = 0; $i < $ezbResultsNodesText->length; $i++) {
210                $accessClass = 'unknown';
211                $accessClassExpressions = [
212                    'denied'    => "//div[@class='t_ezb_result']["
213                        . ($i + 1) . "]/p/span[@class='t_ezb_red']",
214                    'limited' => "//div[@class='t_ezb_result']["
215                        . ($i + 1) . "]/p/span[@class='t_ezb_yellow']",
216                    'open'  => "//div[@class='t_ezb_result']["
217                        . ($i + 1) . "]/p/span[@class='t_ezb_green']",
218                ]; // $i+1 because XPath-element-counting starts with 1
219                foreach ($accessClassExpressions as $key => $value) {
220                    if ($xpath->evaluate("count({$value})") == 1) {
221                        $accessClass = $key;
222                    }
223                }
224
225                $itemInfo = '';
226
227                $expression = "//div[@class='t_ezb_result']["
228                    . ($i + 1) . ']/p/sup';
229                if ($xpath->evaluate("count({$expression})") == 1) {
230                    $itemInfo = $this->parseRediInfo(
231                        $xml,
232                        $xpath->query($expression)->item(0)->textContent
233                    );
234                }
235
236                $retval[] = [
237                    'title' => $ezbResultsNodesText->item($i)->textContent,
238                    'href' => $ezbResultsNodesURL->item($i)
239                        ->attributes->getNamedItem('href')->textContent,
240                    'access'       => $accessClass,
241                    'coverage'     => $itemInfo,
242                    'service_type' => 'getFullTxt',
243                ];
244            }
245        }
246
247        return $retval;
248    }
249
250    /**
251     * Hook for post processing of the parsed resolver response (e.g. by removing any
252     * double angle quotation mark from each link['title']).
253     *
254     * @return void
255     */
256    protected function postProcessing()
257    {
258        for ($i = 0; $i < count($this->links); $i++) {
259            if (isset($this->links[$i]['title'])) {
260                $this->links[$i]['title'] = $this
261                    ->removeDoubleAngleQuotationMarks($this->links[$i]['title']);
262                $this->links[$i]['title'] = trim($this->links[$i]['title']);
263            }
264            if (isset($this->links[$i]['coverage'])) {
265                $this->links[$i]['coverage'] = trim($this->links[$i]['coverage']);
266            }
267        }
268    }
269
270    /**
271     * Helper function to remove hardcoded link-string "»" in Redi response
272     *
273     * @param string $string String to be manipulated
274     *
275     * @return string
276     */
277    protected function removeDoubleAngleQuotationMarks($string)
278    {
279        return str_replace(
280            ['»', chr(194) . chr(160)],
281            ['', ''],
282            $string
283        ); // hack to replace \u00a0
284    }
285}