Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.83% |
90 / 92 |
|
75.00% |
6 / 8 |
CRAP | |
0.00% |
0 / 1 |
Redi | |
97.83% |
90 / 92 |
|
75.00% |
6 / 8 |
22 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
fetchLinks | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
parseLinks | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
parseDOI | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
3 | |||
parseRediInfo | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
4.02 | |||
parseRediOpenURLs | |
100.00% |
37 / 37 |
|
100.00% |
1 / 1 |
6 | |||
postProcessing | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
removeDoubleAngleQuotationMarks | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | /** |
4 | * ReDi Link Resolver Driver |
5 | * |
6 | * PHP version 8 |
7 | * |
8 | * Copyright (C) Leipzig University Library 2015 |
9 | * |
10 | * |
11 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License version 2, |
13 | * as published by the Free Software Foundation. |
14 | * |
15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | * GNU General Public License for more details. |
19 | * |
20 | * You should have received a copy of the GNU General Public License |
21 | * along with this program; if not, write to the Free Software |
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23 | * |
24 | * @category VuFind |
25 | * @package Resolver_Drivers |
26 | * @author André Lahmann <lahmann@ub.uni-leipzig.de> |
27 | * @author Gregor Gawol <gawol@ub.uni-leipzig.de> |
28 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
29 | * @link https://vufind.org/wiki/development:plugins:link_resolver_drivers Wiki |
30 | */ |
31 | |
32 | namespace VuFind\Resolver\Driver; |
33 | |
34 | use DOMDocument; |
35 | use Laminas\Dom\DOMXPath; |
36 | |
37 | use function chr; |
38 | use function count; |
39 | |
40 | /** |
41 | * ReDi Link Resolver Driver |
42 | * |
43 | * @category VuFind |
44 | * @package Resolver_Drivers |
45 | * @author André Lahmann <lahmann@ub.uni-leipzig.de> |
46 | * @author Gregor Gawol <gawol@ub.uni-leipzig.de> |
47 | * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License |
48 | * @link https://vufind.org/wiki/development:plugins:link_resolver_drivers Wiki |
49 | */ |
50 | class Redi extends AbstractBase |
51 | { |
52 | /** |
53 | * HTTP client |
54 | * |
55 | * @var \Laminas\Http\Client |
56 | */ |
57 | protected $httpClient; |
58 | |
59 | /** |
60 | * Parsed resolver links |
61 | * |
62 | * @var array |
63 | */ |
64 | protected $links; |
65 | |
66 | /** |
67 | * Constructor |
68 | * |
69 | * @param string $baseUrl Base URL for link resolver |
70 | * @param \Laminas\Http\Client $httpClient HTTP client |
71 | */ |
72 | public function __construct($baseUrl, \Laminas\Http\Client $httpClient) |
73 | { |
74 | parent::__construct($baseUrl); |
75 | $this->httpClient = $httpClient; |
76 | } |
77 | |
78 | /** |
79 | * Fetch Links |
80 | * |
81 | * Fetches a set of links corresponding to an OpenURL |
82 | * |
83 | * @param string $openURL openURL (url-encoded) |
84 | * |
85 | * @return string raw XML returned by resolver |
86 | */ |
87 | public function fetchLinks($openURL) |
88 | { |
89 | $url = $this->getResolverUrl($openURL); |
90 | $feed = $this->httpClient->setUri($url)->send()->getBody(); |
91 | return $feed; |
92 | } |
93 | |
94 | /** |
95 | * Parse Links |
96 | * |
97 | * Parses an XML file returned by a link resolver |
98 | * and converts it to a standardised format for display |
99 | * |
100 | * @param string $xmlstr Raw XML returned by resolver |
101 | * |
102 | * @return array Array of values |
103 | */ |
104 | public function parseLinks($xmlstr) |
105 | { |
106 | $xml = new DOMDocument(); |
107 | if (!@$xml->loadHTML($xmlstr)) { |
108 | return []; |
109 | } |
110 | |
111 | // parse the raw resolver-data |
112 | $this->links = array_merge( |
113 | $this->parseDOI($xml), |
114 | $this->parseRediOpenURLs($xml) |
115 | ); |
116 | |
117 | // perform (individual) postprocessing on parsed resolver-data |
118 | $this->postProcessing(); |
119 | |
120 | return $this->links; |
121 | } |
122 | |
123 | /** |
124 | * Parse the Redi XML response and return array with DOI information. |
125 | * |
126 | * @param DOMDocument $xml Loaded xml document |
127 | * |
128 | * @return array Get back a array with title, URL and service_type |
129 | */ |
130 | protected function parseDOI($xml) |
131 | { |
132 | $retval = []; |
133 | |
134 | $xpath = new DOMXPath($xml); |
135 | |
136 | $doiTerm = $xpath |
137 | ->query("//dt[@class='doi_t']"); |
138 | $doiDefinition = $xpath |
139 | ->query("//dd[@class='doi_d']"); |
140 | |
141 | if ($doiTerm->length == $doiDefinition->length) { |
142 | for ($i = 0; $i < $doiTerm->length; $i++) { |
143 | $href = $xpath |
144 | ->query('.//@href', $doiDefinition->item($i)) |
145 | ->item(0)->textContent; |
146 | $retval[] = [ |
147 | 'title' => $doiTerm->item($i)->textContent |
148 | . $doiDefinition->item($i)->textContent, |
149 | 'href' => $href, |
150 | 'access' => 'unknown', |
151 | 'coverage' => null, |
152 | 'service_type' => 'getDOI', |
153 | ]; |
154 | } |
155 | } |
156 | |
157 | return $retval; |
158 | } |
159 | |
160 | /** |
161 | * Parse Redi additional information elements and return the one identified by |
162 | * the infoToken provided (e.g. "*") |
163 | * |
164 | * @param DOMDocument $xml Loaded xml document |
165 | * @param string $infoToken InfoToken to search for |
166 | * |
167 | * @return string |
168 | */ |
169 | protected function parseRediInfo($xml, $infoToken) |
170 | { |
171 | $xpath = new DOMXPath($xml); |
172 | |
173 | // additional info nodes - marked by "<sup>*</sup>" |
174 | $infoTokenNodes = $xpath->query("//div[@id='t_ezb']/div[@class='t']/p/sup"); |
175 | |
176 | if ($infoTokenNodes->length > 0) { |
177 | for ($i = 0; $i < $infoTokenNodes->length; $i++) { |
178 | if ($infoToken == $infoTokenNodes->item($i)->textContent) { |
179 | return $xpath |
180 | ->query("//div[@id='t_ezb']/div[@class='t']/p/sup/..") |
181 | ->item($i)->textContent; |
182 | } |
183 | } |
184 | } |
185 | |
186 | return ''; |
187 | } |
188 | |
189 | /** |
190 | * Parse if the Redi xml snippet contains Redi urls. |
191 | * |
192 | * @param DOMDocument $xml Loaded xml document |
193 | * |
194 | * @return array Get back Redi direct link to sources containing title, URL and |
195 | * service_type |
196 | */ |
197 | protected function parseRediOpenURLs($xml) |
198 | { |
199 | $retval = []; |
200 | |
201 | $xpath = new DOMXPath($xml); |
202 | |
203 | $ezbResultsNodesText = $xpath |
204 | ->query("//div[@class='t_ezb_result']/p"); |
205 | $ezbResultsNodesURL = $xpath |
206 | ->query("//div[@class='t_ezb_result']/p/span[@class='t_link']/a"); |
207 | |
208 | if ($ezbResultsNodesText->length == $ezbResultsNodesURL->length) { |
209 | for ($i = 0; $i < $ezbResultsNodesText->length; $i++) { |
210 | $accessClass = 'unknown'; |
211 | $accessClassExpressions = [ |
212 | 'denied' => "//div[@class='t_ezb_result'][" |
213 | . ($i + 1) . "]/p/span[@class='t_ezb_red']", |
214 | 'limited' => "//div[@class='t_ezb_result'][" |
215 | . ($i + 1) . "]/p/span[@class='t_ezb_yellow']", |
216 | 'open' => "//div[@class='t_ezb_result'][" |
217 | . ($i + 1) . "]/p/span[@class='t_ezb_green']", |
218 | ]; // $i+1 because XPath-element-counting starts with 1 |
219 | foreach ($accessClassExpressions as $key => $value) { |
220 | if ($xpath->evaluate("count({$value})") == 1) { |
221 | $accessClass = $key; |
222 | } |
223 | } |
224 | |
225 | $itemInfo = ''; |
226 | |
227 | $expression = "//div[@class='t_ezb_result'][" |
228 | . ($i + 1) . ']/p/sup'; |
229 | if ($xpath->evaluate("count({$expression})") == 1) { |
230 | $itemInfo = $this->parseRediInfo( |
231 | $xml, |
232 | $xpath->query($expression)->item(0)->textContent |
233 | ); |
234 | } |
235 | |
236 | $retval[] = [ |
237 | 'title' => $ezbResultsNodesText->item($i)->textContent, |
238 | 'href' => $ezbResultsNodesURL->item($i) |
239 | ->attributes->getNamedItem('href')->textContent, |
240 | 'access' => $accessClass, |
241 | 'coverage' => $itemInfo, |
242 | 'service_type' => 'getFullTxt', |
243 | ]; |
244 | } |
245 | } |
246 | |
247 | return $retval; |
248 | } |
249 | |
250 | /** |
251 | * Hook for post processing of the parsed resolver response (e.g. by removing any |
252 | * double angle quotation mark from each link['title']). |
253 | * |
254 | * @return void |
255 | */ |
256 | protected function postProcessing() |
257 | { |
258 | for ($i = 0; $i < count($this->links); $i++) { |
259 | if (isset($this->links[$i]['title'])) { |
260 | $this->links[$i]['title'] = $this |
261 | ->removeDoubleAngleQuotationMarks($this->links[$i]['title']); |
262 | $this->links[$i]['title'] = trim($this->links[$i]['title']); |
263 | } |
264 | if (isset($this->links[$i]['coverage'])) { |
265 | $this->links[$i]['coverage'] = trim($this->links[$i]['coverage']); |
266 | } |
267 | } |
268 | } |
269 | |
270 | /** |
271 | * Helper function to remove hardcoded link-string "»" in Redi response |
272 | * |
273 | * @param string $string String to be manipulated |
274 | * |
275 | * @return string |
276 | */ |
277 | protected function removeDoubleAngleQuotationMarks($string) |
278 | { |
279 | return str_replace( |
280 | ['»', chr(194) . chr(160)], |
281 | ['', ''], |
282 | $string |
283 | ); // hack to replace \u00a0 |
284 | } |
285 | } |