File: //opt/alt/php56/usr/share/pear/File/MARC.php
<?php
/* vim: set expandtab shiftwidth=4 tabstop=4 softtabstop=4 foldmethod=marker: */
/**
 * Parser for MARC records
 *
 * This package is based on the PHP MARC package, originally called "php-marc",
 * that is part of the Emilda Project (http://www.emilda.org). Christoffer
 * Landtman generously agreed to make the "php-marc" code available under the
 * GNU LGPL so it could be used as the basis of this PEAR package.
 * 
 * PHP version 5
 *
 * LICENSE: This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * @category  File_Formats
 * @package   File_MARC
 * @author    Christoffer Landtman <[email protected]>
 * @author    Dan Scott <[email protected]>
 * @copyright 2003-2010 Oy Realnode Ab, Dan Scott
 * @license   http://www.gnu.org/copyleft/lesser.html  LGPL License 2.1
 * @version   CVS: $Id$
 * @link      http://pear.php.net/package/File_MARC
 * @example   read.php Retrieve specific fields and subfields from a record
 * @example   subfields.php Create new subfields and add them in specific order
 * @example   marc_yaz.php Pretty print a MARC record retrieved through the PECL yaz extension
 */
require_once 'PEAR/Exception.php';
require_once 'File/MARCBASE.php';
require_once 'File/MARC/Record.php';
require_once 'File/MARC/Field.php';
require_once 'File/MARC/Control_Field.php';
require_once 'File/MARC/Data_Field.php';
require_once 'File/MARC/Subfield.php';
require_once 'File/MARC/Exception.php';
require_once 'File/MARC/List.php';
// {{{ class File_MARC
/**
 * The main File_MARC class enables you to return File_MARC_Record
 * objects from a stream or string.
 *
 * @category File_Formats
 * @package  File_MARC
 * @author   Christoffer Landtman <[email protected]>
 * @author   Dan Scott <[email protected]>
 * @license  http://www.gnu.org/copyleft/lesser.html  LGPL License 2.1
 * @link     http://pear.php.net/package/File_MARC
 */
class File_MARC extends File_MARCBASE
{
    // {{{ constants
    /**
     * MARC records retrieved from a file
     */
    const SOURCE_FILE = 1;
    /**
     * MARC records retrieved from a binary string 
     */
    const SOURCE_STRING = 2;
    /**
     * Hexadecimal value for Subfield indicator
     */
    const SUBFIELD_INDICATOR = "\x1F";
    /**
     * Hexadecimal value for End of Field
     */
    const END_OF_FIELD = "\x1E";
    /**
     * Hexadecimal value for End of Record
     */
    const END_OF_RECORD = "\x1D";
    /**
     * Length of the Directory
     */
    const DIRECTORY_ENTRY_LEN = 12;
    /**
     * Length of the Leader
     */
    const LEADER_LEN = 24;
    /**
     * Maximum record length
     */
    const MAX_RECORD_LENGTH = 99999;
    // }}}
    // {{{ properties
    /**
     * Source containing raw records
     * 
     * @var resource
     */
    protected $source;
    /**
     * Source type (SOURCE_FILE or SOURCE_STRING)
     * 
     * @var int
     */
    protected $type;
    /**
     * XMLWriter for writing collections
     * 
     * @var XMLWriter
     */
    protected $xmlwriter;
    // }}}
    // {{{ Constructor: function __construct()
    /**
     * Read in MARC records
     *
     * This function reads in MARC record files or strings that
     * contain one or more MARC records.
     *
     * <code>
     * <?php
     * // Retrieve MARC records from a file
     * $journals = new File_MARC('journals.mrc', SOURCE_FILE);
     *
     * // Retrieve MARC records from a string (e.g. Z39 query results)
     * $monographs = new File_MARC($raw_marc, SOURCE_STRING);
     * ?>
     * </code>
     *
     * @param string $source        Name of the file, or a raw MARC string
     * @param int    $type          Source of the input, either SOURCE_FILE or SOURCE_STRING
     * @param string $record_class  Record class, defaults to File_MARC_Record
     */
    function __construct($source, $type = self::SOURCE_FILE, $record_class = null)
    {
        parent::__construct($source, $type, $record_class);
        switch ($type) {
        case self::SOURCE_FILE:
            $this->type = self::SOURCE_FILE;
            $this->source = fopen($source, 'rb');
            if (!$this->source) {
                 $errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_FILE], array('filename' => $source));
                 throw new File_MARC_Exception($errorMessage, File_MARC_Exception::ERROR_INVALID_FILE);
            }
            break;
        case self::SOURCE_STRING:
            $this->type = self::SOURCE_STRING;
            $this->source = explode(File_MARC::END_OF_RECORD, $source);
            break;
        default:
            throw new File_MARC_Exception(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_SOURCE], File_MARC_Exception::ERROR_INVALID_SOURCE);
        }
    }
    // }}}
    // {{{ nextRaw()
    /**
     * Return the next raw MARC record
     *
     * Returns the next raw MARC record, unless all records already have
     * been read.
     *
     * @return string Either a raw record or false
     */
    function nextRaw()
    {
        if ($this->type == self::SOURCE_FILE) {
            $record = stream_get_line($this->source, File_MARC::MAX_RECORD_LENGTH, File_MARC::END_OF_RECORD);
            // Remove illegal stuff that sometimes occurs between records
            $record = preg_replace('/^[\\x0a\\x0d\\x00]+/', "", $record);
        } elseif ($this->type == self::SOURCE_STRING) {
            $record = array_shift($this->source);
        }
        // Exit if we are at the end of the file
        if (!$record) {
            return false;
        }
        // Append the end of record we lost during stream_get_line() or explode()
        $record .= File_MARC::END_OF_RECORD;
        return $record;
    }
    // }}}
    // {{{ next()
    /**
     * Return next {@link File_MARC_Record} object
     *
     * Decodes the next raw MARC record and returns the {@link File_MARC_Record}
     * object.
     * <code>
     * <?php
     * // Retrieve a set of MARC records from a file
     * $journals = new File_MARC('journals.mrc', SOURCE_FILE);
     *
     * // Iterate through the retrieved records
     * while ($record = $journals->next()) {
     *     print $record;
     *     print "\n";
     * }
     *
     * ?>
     * </code>
     *
     * @return File_MARC_Record next record, or false if there are
     * no more records
     */
    function next()
    {
        $raw = $this->nextRaw();
        if ($raw) {
            return $this->_decode($raw);
        } else {
            return false;
        }
    }
    // }}}
    // {{{ _decode()
    /**
     * Decode a given raw MARC record
     *
     * Port of Andy Lesters MARC::File::USMARC->decode() Perl function into PHP.
     *
     * @param string $text Raw MARC record
     *
     * @return File_MARC_Record Decoded File_MARC_Record object
     */
    private function _decode($text)
    {
        $marc = new $this->record_class($this);
        // fallback on the actual byte length
        $record_length = strlen($text);
        $matches = array();
        if (preg_match("/^(\d{5})/", $text, $matches)) {
            // Store record length
            $record_length = $matches[1];
            if ($record_length != strlen($text)) {
                $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INCORRECT_LENGTH], array("record_length" => $record_length, "actual" => strlen($text))));
                // Real beats declared byte length
                $record_length = strlen($text);
            }
        } else {
            $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_NONNUMERIC_LENGTH], array("record_length" => substr($text, 0, 5))));
        }
        if (substr($text, -1, 1) != File_MARC::END_OF_RECORD)
             throw new File_MARC_Exception(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_TERMINATOR], File_MARC_Exception::ERROR_INVALID_TERMINATOR);
        // Store leader
        $marc->setLeader(substr($text, 0, File_MARC::LEADER_LEN));
        // bytes 12 - 16 of leader give offset to the body of the record
        $data_start = 0 + substr($text, 12, 5);
        // immediately after the leader comes the directory (no separator)
        $dir = substr($text, File_MARC::LEADER_LEN, $data_start - File_MARC::LEADER_LEN - 1);  // -1 to allow for \x1e at end of directory
        // character after the directory must be \x1e
        if (substr($text, $data_start-1, 1) != File_MARC::END_OF_FIELD) {
            $marc->addWarning(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_NO_DIRECTORY]);
        }
        // All directory entries 12 bytes long, so length % 12 must be 0
        if (strlen($dir) % File_MARC::DIRECTORY_ENTRY_LEN != 0) {
            $marc->addWarning(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_LENGTH]);
        }
        // go through all the fields
        $nfields = strlen($dir) / File_MARC::DIRECTORY_ENTRY_LEN;
        for ($n=0; $n<$nfields; $n++) {
            // As pack returns to key 1, leave place 0 in list empty
            list(, $tag) = unpack("A3", substr($dir, $n*File_MARC::DIRECTORY_ENTRY_LEN, File_MARC::DIRECTORY_ENTRY_LEN));
            list(, $len) = unpack("A3/A4", substr($dir, $n*File_MARC::DIRECTORY_ENTRY_LEN, File_MARC::DIRECTORY_ENTRY_LEN));
            list(, $offset) = unpack("A3/A4/A5", substr($dir, $n*File_MARC::DIRECTORY_ENTRY_LEN, File_MARC::DIRECTORY_ENTRY_LEN));
            // Check directory validity
            if (!preg_match("/^[0-9A-Za-z]{3}$/", $tag)) {
                $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_TAG], array("tag" => $tag)));
            }
            if (!preg_match("/^\d{4}$/", $len)) {
                $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_TAG_LENGTH], array("tag" => $tag, "len" => $len)));
            }
            if (!preg_match("/^\d{5}$/", $offset)) {
                $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_OFFSET], array("tag" => $tag, "offset" => $offset)));
            }
            if ($offset + $len > $record_length) {
                $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY], array("tag" => $tag)));
            }
            $tag_data = substr($text, $data_start + $offset, $len);
            if (substr($tag_data, -1, 1) == File_MARC::END_OF_FIELD) {
                /* get rid of the end-of-tag character */
                $tag_data = substr($tag_data, 0, -1);
                $len--;
            } else {
                $marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_FIELD_EOF], array("tag" => $tag)));
            }
            if (preg_match("/^\d+$/", $tag) and ($tag < 10)) {
                $marc->appendField(new File_MARC_Control_Field($tag, $tag_data));
            } else {
                $subfields = explode(File_MARC::SUBFIELD_INDICATOR, $tag_data);
                $indicators = array_shift($subfields);
                if (strlen($indicators) != 2) {
                    $errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_INDICATORS], array("tag" => $tag, "indicators" => $indicators));
                    $marc->addWarning($errorMessage);
                    // Do the best with the indicators we've got
                    if (strlen($indicators) == 1) {
                        $ind1 = $indicators;
                        $ind2 = " ";
                    } else {
                        list($ind1,$ind2) = array(" ", " ");
                    }
                } else {
                    $ind1 = substr($indicators, 0, 1);
                    $ind2 = substr($indicators, 1, 1);
                }
                // Split the subfield data into subfield name and data pairs
                $subfield_data = array();
                foreach ($subfields as $subfield) {
                    if (strlen($subfield) > 0) {
                        $subfield_data[] = new File_MARC_Subfield(substr($subfield, 0, 1), substr($subfield, 1));
                    } else {
                         $errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_EMPTY_SUBFIELD], array("tag" => $tag));
                         $marc->addWarning($errorMessage);
                    }
                }
                if (!isset($subfield_data)) {
                     $errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_EMPTY_SUBFIELD], array("tag" => $tag));
                     $marc->addWarning($errorMessage);
                }
                // If the data is invalid, let's just ignore the one field
                try {
                    $new_field = new File_MARC_Data_Field($tag, $subfield_data, $ind1, $ind2);
                    $marc->appendField($new_field);
                } catch (Exception $e) {
                    $marc->addWarning($e->getMessage());
                }
            }
        }
        return $marc;
    }
    // }}}
}
// }}}