; oai.ini -- OAI-PMH harvest settings. ; ; For every OAI-PMH source you would like to harvest, create a section like this: ; ; [section_name] ; url = http://oai.myuniversity.edu/ ; set = my_optional_set ; metadataPrefix = oai_dc ; timeout = 60 ; combineRecords = false ; combineRecordsTag = ; idSearch[] = "/oai:myuniversity.edu:/" ; idReplace[] = "myprefix-" ; injectDate = false ; injectId = false ; injectSetName = false ; injectSetSpec = false ; injectHeaderElements[] = hierarchy ; dateGranularity = auto ; harvestedIdLog = harvest.log ; verbose = false ; autosslca = true ; sslcapath = "/etc/ssl/certs" ; e.g. for Debian systems ; sslcafile = "/etc/pki/tls/cert.pem" ; e.g. for CentOS systems ; sslverifypeer = true ; sanitize = true ; sanitizeRegex[] = "/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u" ; badXMLLog = bad.log ; httpUser = myUsername ; httpPass = myPassword ; stopAfter = 100 ; ; The section_name may be passed to harvest_oai.php as a parameter to harvest only ; records from that source. This is also the directory name that records will be ; harvested into (a subdirectory of "harvest" under VUFIND_HOME). ; ; url is the base URL for the OAI-PMH source. ; ; set is the identifier of a set to harvest (normally found in the tag of ; an OAI-PMH ListSets response). You may harvest multiple sets by putting multiple ; "set[] = x" lines into your configuration. Omit set to harvest all records. ; ; metadataPrefix is the metadata format to harvest (oai_dc will be used by default ; if the value is omitted). ; ; timeout may be set to the number of seconds to allow for an OAI request (a value ; of 60 will be used by default). ; ; combineRecords may be set to true if you would like the harvested records to be ; combined into a fewer number of files (this is determined by the OAI server's ; response chunk size). The default setting (false) will result in a new file being ; created for each record. Note that this function is primarily intended for ; harvesting MARC records; many of VuFind's example XSLT transformations for other ; types of metadata are designed to process one record at a time and will not ; work with this setting. However, it is possible to revise the XSLT to handle ; batches of records. Starting with VuFind 6.0, a demo .xsl for OJS that can handle ; combined records within a tag has been included as an example ; and can be found at $VUFIND_HOME/import/xsl/ojs-multirecord.xsl. ; ; combineRecordsTag may be used to supply a beginning and ending XML tag (if ; combinedRecords is set to true) which will be used to wrap the set of ; combined records. The default value is . Note: you may also add ; attributes to this tag, e.g., will correctly ; wrap the records in tags. ; ; globalSearch[] and globalReplace[] may be used to manipulate the raw XML metadata ; documents with regular expressions. This should be used with caution but can be ; helpful when compensating for server-side encoding/markup errors. You may use ; multiple pairs of search and replace settings to perform multiple manipulations. ; ; idPrefix is the OAI-specific prefix attached to ID values. If you provide the ; value here, it will be automatically stripped for you when generating filenames, ; injecting IDs and tracking deleted records. If you omit the setting, full ; OAI ids will be retained. [DEPRECATED -- use idSearch and idReplace instead] ; ; idSearch[] and idReplace[] may be used to manipulate IDs with regular expressions. ; This is useful for adding or removing prefixes and swapping out problematic ; characters. You may use multiple pairs of search and replace settings to perform ; multiple manipulations. ; ; injectDate may be set to an XML tag name in order to inject the datestamp of ; the record into the harvested metadata (enclosed in the specified tag). If ; omitted or set to false, no datestamp-related changes will be made to the ; harvested metadata. ; ; injectId may be set to an XML tag name in order to inject the ID of the record ; into the harvested metadata (enclosed in the specified tag). If omitted or set ; to false, no ID-related changes will be made to the harvested metadata. ; ; injectSetName may be set to an XML tag name in order to inject the setName value ; of the record into the harvested metadata (enclosed in the specified tag). If ; omitted or set to false, no setName-related changes will be made to the harvested ; metadata. ; ; injectSetSpec may be set to an XML tag name in order to inject the setSpec value ; of the record into the harvested metadata (enclosed in the specified tag). If ; omitted or set to false, no setSpec-related changes will be made to the harvested ; metadata. ; ; injectHeaderElements may be set to an array of elements within the header of the ; OAI-PMH response which should be copied into the saved XML document. This is ; rarely necessary. ; ; dateGranularity is the granularity used by the server for representing dates. ; This may be "YYYY-MM-DDThh:mm:ssZ," "YYYY-MM-DD" or "auto" (to query the server ; for details). The default is "auto." ; ; harvestedIdLog is a filename (inside your harvest directory) for a text file ; listing all non-deleted harvested records encountered. If you omit this setting, ; no log file will be generated. Subsequent harvests will append to the file if ; it already exists. ; ; verbose may be set to true in order to display more detailed output while ; harvesting; this may be useful for troubleshooting purposes, but it defaults to ; false. ; ; autosslca will attempt to autodetect your SSL certificate authority. ; ; sslcafile can be used to specify the path to an SSL certificate authority ; file (e.g. /etc/pki/tls/cert.pem on CentOS/RedHat systems). ; ; sslcapath can be used to specify the path to an SSL certificate authority ; directory (e.g. /etc/ssl/certs on Debian systems). ; ; sslverifypeer may be set to false to disable SSL certificate checking; it defaults ; to true, and changing the setting is not recommended. ; ; sanitize may be set to true to strip illegal characters from XML responses; it ; defaults to false, assuming that the OAI-PMH server you are harvesting from will ; provide you with valid data. ; ; sanitizeRegex may be set to an array of regex strings used to sanitize XML retrieved ; from an OAI-PMH source. Any text sequences matching these expressions will be ; replaced with blank spaces. ; ; badXMLLog may be set to a filename (which will be created within your harvest ; directory) to contain copies of bad XML that was fixed when the sanitize setting ; (above) is set to true. ; ; httpUser is an optional username in case the OAI repository is behind HTTP basic ; authentication. It must be set in combination with httpPass. ; ; httpPass is an optional password in case the OAI repository is behind HTTP basic ; authentication. It must be set in combination with httpUser. ; ; stopAfter may be set to a natural positive number 'n' in order to stop harvesting ; after just the first 'n' records have been harvested. This option can be used for ; testing purposes. It allows the harvesting of smaller data sets. ; ; SAMPLE CONFIGURATION FOR OPEN JOURNAL SYSTEMS ;[OJS] ;url = http://ojs.myuniversity.edu/oai ;metadataPrefix = oai_dc ;idSearch[] = "/^oai:myuniversity.edu:/" ;idReplace[] = "ojs-" ;idSearch[] = "/\//" ;idReplace[] = "-" ;injectId = "identifier" ;injectDate = "datestamp" ; ; Further examples for harvesting OAI-PMH sources available to the general ; public may be found on this wiki page: ; https://vufind.org/wiki/indexing:open_data_sources