[sync] WBXML Decode class

Anthony Mills amills at gascard.net
Mon Dec 1 21:24:08 PST 2003


If someone wants to do some voodoo, and check the code in, I wouldn't mind.  I
don't think it will 'compile'.  I tried, but I have not figured out the
packages yet.  I followed horde/docs/CODING_STANDARDS.  So hopefuly it is a
little better.  I as soon as I figure out binary bytes in php I am going to
start testing the implementation.  Some parts of the code are not finished look
for 'FIXME'.  Some is java still, some is blank, but I wrote what it should do.
 And some of the code I forgot, oops, should wrote better comments.  I'm sure
when I start testing it, I will figure it out.  No hurry on the check in, I
think work will keep me busy for the next ever.

Thanks,

Anthony
-------------- next part --------------
<?php

include_once 'XML/WBXML.php';
include_once 'XML/WBXML/DTD.php';
include_once 'XML/WBXML/DTDManager.php';

/**
 * $Horde: framework/XML_WBXML/WBXML/DTDManager.php,v 1.1 2003/11/26 04:14:28 chuck Exp $
 *
 * Copyright 2003 Anthony Mills <amills at pyramid6.com>
 *
 * See the enclosed file COPYING for license information (LGPL).  If you
 * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
 *
 * From Binary XML Content Format Specification Version 1.3, 25 July
 * 2001 found at http://www.wapforum.org
 *
 * @package XML_WBXML
 */
class XML_WBXML_Decode {

    /**
     *  WBXML Version
     *  1,2, or 3 supported
     *
     */    
    var $_wbxmlVersion;
    
    /**
     *  Document Public Identifier type
     *  1 mb_u_int32 well know type
     *  2 string table
     *  from spec but converted into a string
     *
     *  Document Public Identifier
     *  Used with dpiType
     *
     */
    var $_dpi;
    
    /**
     *  Charset
     */
    var $_charset;
    
    /**
     *  String table as defined in 5.7
     */    
    var $_stringTable = array();
    
    
    /**
      *  Content handler  current just outputs raw XML
      */
    
    var $_ch;
    
    var $_tagDTD;
    
    var $_prevAttributeDTD;
    
    var $_attributeDTD;
    
    //state variables    
    var $_tagStack;
    var $_isAttribute;
    var $_isData = false;
    
    
    function decode($input)
    {
        //get Version Number from Section 5.4
        //version = u_int8
        //currently 1,2 or 3
        $this->_wbxmlVersion = getVersionNumber($input);
        
        //get Document Public Idetifier from Section 5.5
        //publicid = mb_u_int32 | ( zero index )
        //zero = u_int8
        //containing the value zero (0)        
        //The actual DPI is determined after the String Table is read
        $dpiStruct = getDocumentPublicIdentifier($input);
        
        //get Charset from 5.6
        //charset = mb_u_int32
        $this->_charset = getCharset($input);
        
        //get String Table from 5.7
        //strb1 = length *byte
        $this->_stringTable = getStringTable($input, $charset);
        
        //get Document Public Idetifier from Section 5.5
        $this->_dpi = getDocumentPublicIdentifierImpl($dpiStruct['dpiType'], $dpiStruct['dpiNumber'], $this->_stringTable);
        
        //now the real fun begins
        //from Sections 5.2 and 5.8
        
        //default content handler
        $this->ch = new XML_WBXML_ContentHandler;
        
        while (decodeInternal($input));
    }
    
    function getVersionNumber($input)
    {
        return fread($input);
    }
    
    function getDocumentPublicIdentifier($input)
    {
        $dpistruct = array();//'dpiType' 'dpiNumber'

        $i = XML_WBXML::getMBUInt32(input);

        if ($i == 0) {
            $dpiStruct['dpiType'] = 2;            
            $dpiStruct['dpiNumber'] = fread($input);
        } else {
            $dpiStruct['dpiType'] = 1;
            $dpiStruct['dpiNumber'] = $i;
        }

        return $dpiStruct;
    }
    
    function getDocumentPublicIdentifierImlp($dpiType, $dpiNumber, $st)
    {
        if ($dpiType == 1) {            
            return XML_WBXML::getDPI($dpiNumber);
        } else {
            return $st[$dpiNumber];
        }
    }
    
    /**
     *  returns the character encoding
     *  only default character encodings from J2SE are supported
     *  from http://www.iana.org/assignments/character-sets
     *  and http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html
     *
     */

    function getCharset($input)
    {        
        $cs = XML_WBXML::getMBUInt32($input);        
        $ret = XML_WBXML::getCharset($cs);
        return $ret;
    }       
    
    //FIXME
    function getStringTable($input, $cs)
    {        
        $st = array();
        $size = XML_WBXML::getMBUInt32($input);
        
        //Nice big ol'
        //FIXME
        //Will need help rewritting this one
//        byte[] bytes = new byte[size];
//                
//        int start = 0;        
//        for (int i = 0; i < size; i++ ) {            
//            
//            //May need to fix the null detector for more than ASCII, UTF-8, etc charsets
//            if (isStringTerminator(pbis, cs)) {
//                pbis.read();
//                String newString = new String( bytes, start, i - start, charset );
//                st.put(new Integer(start), newString);
//                start = i+1;
//                System.err.println("start: " + start + " newString: " + newString);
//            } else {
//                bytes[i] = (byte)pbis.read();
//            }
//        }        
//        
//        if (start<size) {
//            String newString = new String( bytes, start, size - start, charset);
//            st.put(new Integer(start), newString);
//        }
//

        return $st;
    }
    
    function decodeInternal($input)
    {
        $token = fread($input);
        
        if ($token)
        {
            $str;

            switch ($token) {
            case WBXML_GLOBAL_TOKEN_STR_I://Section 5.8.4.1 
                $str = termstr($input);
                $this->_ch->characters($str);
                break;

            case WBXML_GLOBAL_TOKEN_STR_T://Section 5.8.4.1
                $str = (String)stringTable.get(new Integer((int)WBXMLUtil.getMBUInt32(input)));
                $this->_ch->characters($str);
                break;

            case WBXML_GLOBAL_TOKEN_EXT_I_0://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_I_1://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_I_2://Section 5.8.4.2
                $str = termstr($input);
                $this->_ch->characters($str);
                break;

            case WBXML_GLOBAL_TOKEN_EXT_T_0://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_T_1://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_T_2://Section 5.8.4.2
                $str = (String)stringTable.get(new Integer((int)WBXMLUtil.getMBUInt32(input)));                    
                $this->_ch->characters($str);
                break;

            case WBXML_GLOBAL_TOKEN_EXT_0://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_1://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_2://Section 5.8.4.2
                $extension = fread($input);
                $this->_ch->characters($extension);
                break;

            case WBXML_GLOBAL_TOKEN_ENTITY://Section 5.8.4.3 
                //UCS-4 chracter encoding??
                $entity = entity(XML_WBXML::getMBUInt32($input));

                $this->_ch->characters('&#' . $entity . ';');
                break;

            case WBXML_GLOBAL_TOKEN_PI:////Section 5.8.4.4
                //throw new IOException("WBXML global token processing instruction(PI," + token +") is unsupported!");
                break;

            case WBXML_GLOBAL_TOKEN_LITERAL://Section 5.8.4.5
                $str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];                    
                parseTag($input, $str, false, false);
                break;

            case WBXML_GLOBAL_TOKEN_LITERAL_A://Section 5.8.4.5
                $str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];                    
                parseTag($input, $str, true, false);
                break;

            case WBXML_GLOBAL_TOKEN_LITERAL_AC://Section 5.8.4.5
                $str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];                    
                parseTag($input, $string, true, true);
                break;

            case WBXML_GLOBAL_TOKEN_LITERAL_C://Section 5.8.4.5
                $str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];                    
                parseTag($input, $str, false, true);
                break;

            case WBXML_GLOBAL_TOKEN_OPAQUE://Section 5.8.4.6
                $size = XML_WBXML::getMBUInt32($input);
                $b = array();
                fread($input, $b, size);

                //FIXME Opaque is used by SYNCML.  Opaque data that depends on the context
                //if (contentHandler instanceof OpaqueContentHandler) {
                //    ((OpaqueContentHandler)contentHandler).opaque(b);
                //} else {
                //    String str = new String(b, 0, size, charset);
                //    char[] chars = str.toCharArray();

                //    contentHandler.characters(chars, 0, chars.length);                    
                //}
                
                //this can cause some problems
                //we may have to use a event based decoder
                break;                    
                
            case WBXML_GLOBAL_TOKEN_END://Section 5.8.4.7.1
                $str = endTag();
                break;                                        
                
            case WBXML_GLOBAL_TOKEN_SWITCH_PAGE://Section 5.8.4.7.2
                $codePage = fread($input);
                switchElementCodePage($codePage);
                break;
                
            default://Section 5.8.3
                //Section 5.8.2
                $hasAttributes = (($token&0x80) != 0);
                $hasContent = (($token&0x40) != 0);                    
                $realToken = $token & 0x3F;
                $str = getTag($realToken);
                
                parseTag($input, $str, $hasAttributes, $hasContent);
                
                if ($realToken == 0x0f) {
                    //FIXME  don't remember this one
                    $this->_isData = true;
                }
                
                break;
            }                        
            
            return true;
        } else {
            return false;
        }            
    }
    
    function parseTag($input, $tag, $hasAttributes, $hasContent)
    {                
        $attrs = array();
        if ($hasAttributes)
            $attrs = getAttributes($input);
        
        $this->_ch.startElement(getCurrentURI, $tag, $attrs);
        
        if ($hasContent) {
            //FIXME  ///I forgot what does this does
            //tagStack.add(tag);
        } else {
            $this->_ch.endElement($tag);
        }
            
    }
    
    function endTag()
    {
        $tag;
        
        if (tagStack.size() > 0) {
            $tag = (String)tagStack.remove(tagStack.size() - 1);
        } else {
            $tag = 'Unknown';
        }
        
        if ($tag == 'Data')  {          
            $this->_isData = false;
        }
        
        $this->_ch->endElement($tag);
        
        return $tag;
    }
    
    function getAttributes($input)
    {
        $num = 0;
        $attrs = array();
        
        startGetAttributes;
        $hasMoreAttributes = true;
        
        $attr;
        $value;
        
        $token;
        while ($hasMoreAttributes) {
            $token = fread($input);
            
            switch ($token) {
            //atribute specified
            case WBXML_GLOBAL_TOKEN_LITERAL://Section 5.8.4.5
                if (isset($attr)) {
                    $attrs[$num++] = array('attribute' => $attr,
                                          'value' => $value);
                }
                
                $attr = $this->_stringTable[XML_WBXML::getMBUInt32(input)];
                break;
                    
            //value specified
            case WBXML_GLOBAL_TOKEN_EXT_I_0://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_I_1://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_I_2://Section 5.8.4.2
                $value = $value . termstr(input);
                break;
                
            case WBXML_GLOBAL_TOKEN_EXT_T_0://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_T_1://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_T_2://Section 5.8.4.2
                $value = $value . $this->_stringTable[XML_WBXML::getMBUInt32(input)];
                break;
                
            case WBXML_GLOBAL_TOKEN_EXT_0://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_1://Section 5.8.4.2
            case WBXML_GLOBAL_TOKEN_EXT_2://Section 5.8.4.2
                $value = $value . fread($input);
                break;                    
                
            case WBXML_GLOBAL_TOKEN_ENTITY://Section 5.8.4.3 
                $value = $value . entity(XML_WBXML::getMBUInt32(input));
                break;
                
            case WBXML_GLOBAL_TOKEN_STR_I://Section 5.8.4.1 
                $value = $value . termstr($input);
                break;
                
            case WBXML_GLOBAL_TOKEN_STR_T://Section 5.8.4.1
                $value = $value . $this->_stringTable[XML_WBXML::getMBUInt32($input)];
                break;
                
            case WBXML_GLOBAL_TOKEN_OPAQUE://Section 5.8.4.6
                $size = XML_WBXML::getMBUInt32(input);
                $b = array();                
                $b = fread($input, $size);
                
                $value = $value . $b;
                break;                    
            case WBXML_GLOBAL_TOKEN_END://Section 5.8.4.7.1
                $hasMoreAttributes = false;
                if (isset($attr)) {
                    $attrs[$num++] = array('attribute' => $attr,
                                          'value' => $value);
                }
                break;   
            case WBXML_GLOBAL_TOKEN_SWITCH_PAGE://Section 5.8.4.7.2
                $codePage = fread($input);
                if (!$this->_prevAttributeDTD) {
                    $this->_prevAttributeDTD = $this->_attributeDTD;
                }
                
                switchAttributeCodePage($codePage);
                break;             
                
            default:
                if ($token > 128) {
                    if (isset($attr)) {
                        $attrs[$num++] = array('attribute' => $attr,
                                               'value' => $value );
                    }
                    
                    $attr = $this->_attributeDTD->toAttribute($token);
                } else {                        
                    //value
                    $value = $value . $this->_attributeDTD->toAttribute($token);                        
                }
                break;
            }
            
        }
        
        if (!$this->_prevAttributeDTD) {            
            $this->_attributeDTD = $this->_prevAttributeDTD;
            $this->_prevAttributeDTD = false;
        }        
        
        stopGetAttributes;        
    }
    
    function startGetAttributes()
    {
        $this->_isAttribute = true;
    }
    
    function stopGetAttributes()
    {
        $this->_isAttribute = false;
    }
    
    function getCurrentURI()
    {
        if ($_isAttribute)
            return $this->_tagDTD->getURI();
        else
            return $this->_attributeDTD->getURI();        
    }
    
    function writeString($str)
    {
        $this->_ch->characters(str);
    }
    
    
    function getTag($tag)
    {
        //should know which state it is in
        return $this->_tagDTD->toTag($tag);
    }
    
    function getAttribute($attribute)
    {
        //should know which state it is in
        $this->_attributeDTD->toAttribute(attribute);
    }
    
    function switchElementCodePage($codePage)
    {
        $this->_tagDTD = DTDManager::getInstance($this->tagDTD->toCodePage($codePage));
        switchAttributeCodePage(codePage);
    }
    
    function switchAttributeCodePage($codePage)
    {
        $this->_attributeDTD = DTDManager::getInstance($this->_attributeDTD->toCodePage($codePage));
    }
    
    //FIXME  //needs to convert base 10 to base 16(hex)
    function entity($entity)
    {
        return $entity;
    }
    
    //FIXME  //reads a null terminated string
    function termstr($input)
    {
        return '';
    }
}
 
class XML_WBXML_ContentHandler {

    var $_currenturi;
    
    var $_indent;

    function startElement($uri, $element, $attrs)
    {
        _padspace;
        print('<' . $element);
        
        if ((!isset($_currenturi)) || ($_currenturi != $uri)) {
            print(" xmlns=\"" . $uri . "\"");
            $_currenturi = $uri;
        }
        
        foreach ($attrs as $attr) {
            print(' ' . $attr['attiribute'] . "=\"" . $attr['value'] . "\"");
        }
        
        println('>');
        
        $_indent++;
    }

    function endElement($uri, $element)
    {
        $_indent--;
        
        _padspaces;        
        println('</' . $element . '>');
    }

    function characters($str)
    {
        _padspaces;
        println($str);
    }

    //padding to make it easier to read

    function _padspaces() {
        for ($i = 0; $i < $indent; $i++) {
            print('  ');
        }
        
    }
}

?>


More information about the sync mailing list