* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License * @link https://vufind.org/wiki/development:testing:unit_tests Wiki */ namespace VuFind\Marc\Test; use VuFind\Marc\MarcCollection; use VuFind\Marc\MarcLint; use VuFind\Marc\MarcReader; /** * MarcLint Test Class * * @category VuFind * @package Tests * @author Ere Maijala * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License * @link https://vufind.org/wiki/development:testing:unit_tests Wiki */ class MarcLintTest extends \PHPUnit\Framework\TestCase { use Feature\FixtureTrait; use Feature\ReflectionTrait; /** * Test check020 method * * @param string $expected Expected output * @param string $input Input * * @dataProvider get020TestData * * @return void */ public function testCheck020($expected, $input) { $record = new MarcReader("$input"); $lint = new MarcLint(); $this->callMethod($lint, 'check020', [$record->getField('020'), $record]); $this->assertEquals( $expected, $this->getProperty($lint, 'warnings') ); } /** * Data provider for testCheck020 * * @return array */ public static function get020TestData() { return [ [ ['020: Subfield a has the wrong number of digits, 154879473.'], // too few digits '154879473', ], [ ['020: Subfield a has bad checksum, 1548794743.'], // invalid checksum '1548794743', ], [ ['020: Subfield a has the wrong number of digits, 15487947443.'], // 11 digits '15487947443', ], [ ['020: Subfield a has the wrong number of digits, 15487947443324.'], // 14 digits '15487947443324', ], [ [], // 13 digit valid '9781548794743', ], [ ['020: Subfield a has bad checksum (13 digit), 9781548794745.'], // 13 digit invalid '9781548794745', ], [ [], // 10 digit valid with qualifier '' . '1548794740 (10 : good checksum)', ], [ ['020: Subfield a has bad checksum, 1548794745 (10 : bad checksum).'], // 10 digit invalid with qualifier '' . '1548794745 (10 : bad checksum)', ], [ ['020: Subfield a may have invalid characters.'], // 10 digit invalid with hyphens and qualifier '' . '1-54879-474-0 (hyphens and good checksum)', ], [ [ '020: Subfield a may have invalid characters.', '020: Subfield a has bad checksum, 1-54879-474-5 (hyphens and bad checksum).', ], // 10 digit invalid with hyphens and qualifier '' . '1-54879-474-5 (hyphens and bad checksum)', ], [ ['020: Subfield a qualifier must be preceded by space, 1548794740(10 : unspaced qualifier).'], // 10 valid without space before qualifier '' . '1548794740(10 : unspaced qualifier)', ], [ [ '020: Subfield a qualifier must be preceded by space, 1548794745(10 : ' . 'unspaced qualifier : bad checksum).', '020: Subfield a has bad checksum, 1548794745(10 : unspaced qualifier : bad checksum).', ], // 10 invalid without space before qualifier '' . '1548794745(10 : unspaced qualifier : bad checksum)', ], [ [], // subfield z '1548794743', ], [ ['020: Subfield z is numerically valid.'], // subfield z with valid checsum 'ISBN 1548794740', ], ]; } /** * Test check041 method * * @param string $expected Expected output * @param string $input Input * * @dataProvider get041TestData * * @return void */ public function testCheck041($expected, $input) { $record = new MarcReader("$input"); $lint = new MarcLint(); $this->callMethod($lint, 'check041', [$record->getField('041'), $record]); $this->assertEquals( $expected, $this->getProperty($lint, 'warnings') ); } /** * Data provider for testCheck041 * * @return array */ public static function get041TestData() { return [ [ [ '041: Subfield _a, end (end), is not valid.', '041: Subfield _a must be evenly divisible by 3 or exactly ' . 'three characters if ind2 is not 7, (span).', '041: Subfield _h, far, may be obsolete.', ], << end span far EOT, ], [ [ '041: Subfield _a, endorviwo (end), is not valid.', '041: Subfield _a, endorviwo (orv), is not valid.', '041: Subfield _a, endorviwo (iwo), is not valid.', '041: Subfield _a must be evenly divisible by 3 or exactly three characters if ind2 is not 7, ' . '(spanowpalasba).', ], << endorviwo spanowpalasba EOT, ], ]; } /** * Test check043 method * * @return void */ public function testCheck043() { $xml = << n----- n-us---- n-ma-us e-ur-ai us EOT; $record = new MarcReader($xml); $lint = new MarcLint(); $this->callMethod($lint, 'check043', [$record->getField('043'), $record]); $this->assertEquals( [ '043: Subfield _a must be exactly 7 characters, n-----', '043: Subfield _a must be exactly 7 characters, n-us----', '043: Subfield _a, n-ma-us, is not valid.', '043: Subfield _a, e-ur-ai, may be obsolete.', ], $this->getProperty($lint, 'warnings') ); } /** * Test check245 method * * @param string $expected Expected output * @param string $input Input * * @dataProvider get245TestData * * @return void */ public function testCheck245($expected, $input) { $record = new MarcReader("$input"); $lint = new MarcLint(); $this->callMethod($lint, 'check245', [$record->getField('245'), $record]); $this->assertEquals( $expected, $this->getProperty($lint, 'warnings') ); } /** * Data provider for testCheck245 * * @return array */ public static function get245TestData() { return [ [ [], 'Subfield a.', ], [ [ '245: Must have a subfield _a.', '245: First subfield must be _a, but it is _b', ], 'no subfield a.', ], [ ['245: Must end with . (period).'], 'No period at end', ], [ [ '245: MARC21 allows ? or ! as final punctuation but LCRI 1.0C, Nov. 2003 ' . '(LCPS 1.7.1 for RDA records), requires period.', ], '' . 'Other punctuation not followed by period!', ], [ [ '245: MARC21 allows ? or ! as final punctuation but LCRI 1.0C, Nov. 2003 ' . '(LCPS 1.7.1 for RDA records), requires period.', ], '' . 'Other punctuation not followed by period?', ], [ ['245: Subfield _c must be preceded by /'], '' . 'Precedes sub cnot preceded by space-slash.' . '', ], [ ['245: Subfield _c must be preceded by /'], 'Precedes sub c/' . 'not preceded by space-slash.', ], [ ['245: Subfield _c initials should not have a space.'], 'Precedes sub c /' . 'initials in sub c B. B.', ], [ [], 'Precedes sub c /' . 'initials in sub c B.B. (no warning).', ], [ ['245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign.'], 'Precedes sub b' . 'not preceded by proper punctuation.', ], [ ['245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign.'], 'Precedes sub b=' . 'not preceded by proper punctuation.', ], [ ['245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign.'], 'Precedes sub b:' . 'not preceded by proper punctuation.', ], [ ['245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign.'], 'Precedes sub b;' . 'not preceded by proper punctuation.', ], [ [], 'Precedes sub b =' . 'preceded by proper punctuation.', ], [ [], 'Precedes sub b :' . 'preceded by proper punctuation.', ], [ [], 'Precedes sub b ;' . 'preceded by proper punctuation.', ], [ ['245: Subfield _h should not be preceded by space.'], 'Precedes sub h ' . '[videorecording].', ], [ [], 'Precedes sub h-- ' . '[videorecording] :' . 'with elipses dash before h.', ], [ ['245: Subfield _h must have matching square brackets, videorecording :.'], 'Precedes sub h-- ' . 'videorecording :' . 'without brackets around GMD.', ], [ [], 'Precedes sub n.' . 'Number 1.', ], [ ['245: Subfield _n must be preceded by . (period).'], 'Precedes sub n' . 'Number 2.', ], [ ['245: Subfield _p must be preceded by , (comma) when it follows subfield _n.'], 'Precedes sub n.' . 'Number 3.Sub n has period not comma.' . '', ], [ [], 'Precedes sub n.' . 'Number 3,Sub n has comma.', ], [ [], 'Precedes sub p.' . 'Sub a has period.', ], [ ['245: Subfield _p must be preceded by . (period) when it follows a subfield other than _n.'], 'Precedes sub p' . 'Sub a has no period.', ], [ ['245: Non-filing indicator is non-numeric'], '' . 'Invalid filing indicator.', ], [ ['245: First word, the, may be an article, check 2nd indicator (0).'], 'The article.', ], [ [], 'The article.', ], [ ['245: First word, an, may be an article, check 2nd indicator (2).'], 'An article.', ], [ ['245: First word, l, may be an article, check 2nd indicator (0).'], 'L'article.', ], [ ['245: First word, a, does not appear to be an article, check 2nd indicator (2).'], 'A la mode.', ], [ [], '' . 'The "quoted article".', ], [ [], '' . 'The (parenthetical article).', ], [ [], '' . '(The) article in parentheses).', ], [ [], '' . '"(The)" 'article' in quotes and parentheses).' . '', ], [ [], '' . '[The supplied title].', ], [ [ '245: Must have a subfield _a.', '245: Must end with . (period).', '245: May have too few subfields.', ], 'sub6', ], [ [ '245: Must end with . (period).', '245: First subfield must be _6, but it is a', ], 'Subfield a.' . 'sub6', ], [ [ '245: Must have a subfield _a.', '245: First subfield after subfield _6 must be _a, but it is _b', '245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign.', ], 'sub6' . 'Subfield b.', ], ]; } /** * Test field 880 * * @return void */ public function test880() { $record = new MarcReader( $this->getFixture('marc/lint/880.xml') ); $expected = [ '245: Field is not repeatable.', '880: No subfield 6.', ]; $lint = new MarcLint(); $this->assertEquals($expected, $lint->checkRecord($record)); } /** * Test records that cover the rest of the rules * * @return void */ public function testRecords() { $lint = new MarcLint(); $collection = new MarcCollection( $this->getFixture('marc/lint/camel.mrc') ); $expected = [ '100: Indicator 1 must be 0, 1 or 3 but it\'s "2"', ]; $warnings = []; foreach ($collection as $record) { $warnings = array_merge($warnings, $lint->checkRecord($record)); } $this->assertEquals($expected, array_filter($warnings)); $record = new MarcReader( $this->getFixture('marc/lint/record2.xml') ); $expected = [ '1XX: Only one 1XX tag is allowed, but I found 2 of them.', '041: Subfield _a, end (end), is not valid.', '041: Subfield _a must be evenly divisible by 3 or exactly three characters if ind2 is not 7, (fren).', '043: Subfield _a, n-us-pn, is not valid.', '082: Subfield _R is not allowed.', '100: Indicator 2 must be blank but it\'s "4"', '245: Indicator 1 must be 0 or 1 but it\'s "9"', '245: Subfield _a is not repeatable.', '260: Subfield _r is not allowed.', '856: Indicator 2 must be blank, 0, 1, 2 or 8 but it\'s "3"', ]; $this->assertEquals($expected, $lint->checkRecord($record)); $marc = $this->getFixture('marc/lint/record3.xml'); $reader = new \VuFind\Marc\MarcReader($marc); $expected = [ '1XX: Only one 1XX tag is allowed, but I found 4 of them.', '245: No 245 tag.', '009: Subfields are not allowed in fields lower than 010', '100: Subfield _a is not repeatable.', '100: Subfield _a has an invalid control character', '110: Field is not repeatable.', '130: Indicator 1 must be 0, 1, 2, 3, 4, 5, 6, 7, 8 or 9 but it\'s "blank"', '130: Indicator 2 must be blank but it\'s "1"', '240: Subfield _b is not allowed.', ]; $this->assertEquals($expected, $lint->checkRecord($reader)); } /** * Test checkArticle method * * @param string $expected Expected output * @param string $input Input * * @dataProvider getCheckArticleTestData * * @return void */ public function testCheckArticle($expected, $input) { $record = new MarcReader("{$input['data']}"); $lint = new MarcLint(); $this->callMethod( $lint, 'checkArticle', [ $record->getField($input['tag']), $record, ] ); $this->assertEquals( $expected, $this->getProperty($lint, 'warnings') ); } /** * Data provider for testCheck041 * * @return array */ public static function getCheckArticleTestData() { return [ [ [], [ 'tag' => '130', 'data' => 'Foo', ], ], [ ['130: Non-filing indicator is out of range'], [ 'tag' => '130', 'data' => 'Foo', ], ], [ ['130: Non-filing indicator is non-numeric'], [ 'tag' => '130', 'data' => 'Foo', ], ], [ ['Internal error: 650 is not a valid field for article checking'], [ 'tag' => '650', 'data' => 'Foo', ], ], ]; } /** * Test parsing of rules * * @return void */ public function testRuleParsing() { $lint = new \VuFind\Marc\MarcLint(); // Test rule parsing for a range of subfields: $ruleGroup = [ '999 R LOCAL', 'ind1 0-9 Undefined', 'ind2 0-9 Undefined', 'a-c R Undefined', ]; $expected = [ 'repeatable' => '', 'desc' => '', 'ind1' => [ 'values' => '', 'hr_values' => '', 'desc' => '', ], 'ind2' => [ 'values' => '', 'hr_values' => '', 'desc' => '', ], 'suba' => [ 'repeatable' => '', 'desc' => '', ], 'subb' => [ 'repeatable' => '', 'desc' => '', ], 'subc' => [ 'repeatable' => '', 'desc' => '', ], ]; $lint = new \VuFind\Marc\MarcLint(); $this->CallMethod( $lint, 'processRuleGroup', [ $ruleGroup, ] ); $rules = $this->getProperty($lint, 'rules'); $this->assertEquals($expected, $rules['999']); } }