[sync] WBXML Decode class
Anthony Mills
amills at gascard.net
Mon Dec 1 21:24:08 PST 2003
If someone wants to do some voodoo, and check the code in, I wouldn't mind. I
don't think it will 'compile'. I tried, but I have not figured out the
packages yet. I followed horde/docs/CODING_STANDARDS. So hopefuly it is a
little better. I as soon as I figure out binary bytes in php I am going to
start testing the implementation. Some parts of the code are not finished look
for 'FIXME'. Some is java still, some is blank, but I wrote what it should do.
And some of the code I forgot, oops, should wrote better comments. I'm sure
when I start testing it, I will figure it out. No hurry on the check in, I
think work will keep me busy for the next ever.
Thanks,
Anthony
-------------- next part --------------
<?php
include_once 'XML/WBXML.php';
include_once 'XML/WBXML/DTD.php';
include_once 'XML/WBXML/DTDManager.php';
/**
* $Horde: framework/XML_WBXML/WBXML/DTDManager.php,v 1.1 2003/11/26 04:14:28 chuck Exp $
*
* Copyright 2003 Anthony Mills <amills at pyramid6.com>
*
* See the enclosed file COPYING for license information (LGPL). If you
* did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
*
* From Binary XML Content Format Specification Version 1.3, 25 July
* 2001 found at http://www.wapforum.org
*
* @package XML_WBXML
*/
class XML_WBXML_Decode {
/**
* WBXML Version
* 1,2, or 3 supported
*
*/
var $_wbxmlVersion;
/**
* Document Public Identifier type
* 1 mb_u_int32 well know type
* 2 string table
* from spec but converted into a string
*
* Document Public Identifier
* Used with dpiType
*
*/
var $_dpi;
/**
* Charset
*/
var $_charset;
/**
* String table as defined in 5.7
*/
var $_stringTable = array();
/**
* Content handler current just outputs raw XML
*/
var $_ch;
var $_tagDTD;
var $_prevAttributeDTD;
var $_attributeDTD;
//state variables
var $_tagStack;
var $_isAttribute;
var $_isData = false;
function decode($input)
{
//get Version Number from Section 5.4
//version = u_int8
//currently 1,2 or 3
$this->_wbxmlVersion = getVersionNumber($input);
//get Document Public Idetifier from Section 5.5
//publicid = mb_u_int32 | ( zero index )
//zero = u_int8
//containing the value zero (0)
//The actual DPI is determined after the String Table is read
$dpiStruct = getDocumentPublicIdentifier($input);
//get Charset from 5.6
//charset = mb_u_int32
$this->_charset = getCharset($input);
//get String Table from 5.7
//strb1 = length *byte
$this->_stringTable = getStringTable($input, $charset);
//get Document Public Idetifier from Section 5.5
$this->_dpi = getDocumentPublicIdentifierImpl($dpiStruct['dpiType'], $dpiStruct['dpiNumber'], $this->_stringTable);
//now the real fun begins
//from Sections 5.2 and 5.8
//default content handler
$this->ch = new XML_WBXML_ContentHandler;
while (decodeInternal($input));
}
function getVersionNumber($input)
{
return fread($input);
}
function getDocumentPublicIdentifier($input)
{
$dpistruct = array();//'dpiType' 'dpiNumber'
$i = XML_WBXML::getMBUInt32(input);
if ($i == 0) {
$dpiStruct['dpiType'] = 2;
$dpiStruct['dpiNumber'] = fread($input);
} else {
$dpiStruct['dpiType'] = 1;
$dpiStruct['dpiNumber'] = $i;
}
return $dpiStruct;
}
function getDocumentPublicIdentifierImlp($dpiType, $dpiNumber, $st)
{
if ($dpiType == 1) {
return XML_WBXML::getDPI($dpiNumber);
} else {
return $st[$dpiNumber];
}
}
/**
* returns the character encoding
* only default character encodings from J2SE are supported
* from http://www.iana.org/assignments/character-sets
* and http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html
*
*/
function getCharset($input)
{
$cs = XML_WBXML::getMBUInt32($input);
$ret = XML_WBXML::getCharset($cs);
return $ret;
}
//FIXME
function getStringTable($input, $cs)
{
$st = array();
$size = XML_WBXML::getMBUInt32($input);
//Nice big ol'
//FIXME
//Will need help rewritting this one
// byte[] bytes = new byte[size];
//
// int start = 0;
// for (int i = 0; i < size; i++ ) {
//
// //May need to fix the null detector for more than ASCII, UTF-8, etc charsets
// if (isStringTerminator(pbis, cs)) {
// pbis.read();
// String newString = new String( bytes, start, i - start, charset );
// st.put(new Integer(start), newString);
// start = i+1;
// System.err.println("start: " + start + " newString: " + newString);
// } else {
// bytes[i] = (byte)pbis.read();
// }
// }
//
// if (start<size) {
// String newString = new String( bytes, start, size - start, charset);
// st.put(new Integer(start), newString);
// }
//
return $st;
}
function decodeInternal($input)
{
$token = fread($input);
if ($token)
{
$str;
switch ($token) {
case WBXML_GLOBAL_TOKEN_STR_I://Section 5.8.4.1
$str = termstr($input);
$this->_ch->characters($str);
break;
case WBXML_GLOBAL_TOKEN_STR_T://Section 5.8.4.1
$str = (String)stringTable.get(new Integer((int)WBXMLUtil.getMBUInt32(input)));
$this->_ch->characters($str);
break;
case WBXML_GLOBAL_TOKEN_EXT_I_0://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_I_1://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_I_2://Section 5.8.4.2
$str = termstr($input);
$this->_ch->characters($str);
break;
case WBXML_GLOBAL_TOKEN_EXT_T_0://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_T_1://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_T_2://Section 5.8.4.2
$str = (String)stringTable.get(new Integer((int)WBXMLUtil.getMBUInt32(input)));
$this->_ch->characters($str);
break;
case WBXML_GLOBAL_TOKEN_EXT_0://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_1://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_2://Section 5.8.4.2
$extension = fread($input);
$this->_ch->characters($extension);
break;
case WBXML_GLOBAL_TOKEN_ENTITY://Section 5.8.4.3
//UCS-4 chracter encoding??
$entity = entity(XML_WBXML::getMBUInt32($input));
$this->_ch->characters('&#' . $entity . ';');
break;
case WBXML_GLOBAL_TOKEN_PI:////Section 5.8.4.4
//throw new IOException("WBXML global token processing instruction(PI," + token +") is unsupported!");
break;
case WBXML_GLOBAL_TOKEN_LITERAL://Section 5.8.4.5
$str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];
parseTag($input, $str, false, false);
break;
case WBXML_GLOBAL_TOKEN_LITERAL_A://Section 5.8.4.5
$str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];
parseTag($input, $str, true, false);
break;
case WBXML_GLOBAL_TOKEN_LITERAL_AC://Section 5.8.4.5
$str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];
parseTag($input, $string, true, true);
break;
case WBXML_GLOBAL_TOKEN_LITERAL_C://Section 5.8.4.5
$str = $this->_stringTable[XML_WBXML::getMBUInt32(input)];
parseTag($input, $str, false, true);
break;
case WBXML_GLOBAL_TOKEN_OPAQUE://Section 5.8.4.6
$size = XML_WBXML::getMBUInt32($input);
$b = array();
fread($input, $b, size);
//FIXME Opaque is used by SYNCML. Opaque data that depends on the context
//if (contentHandler instanceof OpaqueContentHandler) {
// ((OpaqueContentHandler)contentHandler).opaque(b);
//} else {
// String str = new String(b, 0, size, charset);
// char[] chars = str.toCharArray();
// contentHandler.characters(chars, 0, chars.length);
//}
//this can cause some problems
//we may have to use a event based decoder
break;
case WBXML_GLOBAL_TOKEN_END://Section 5.8.4.7.1
$str = endTag();
break;
case WBXML_GLOBAL_TOKEN_SWITCH_PAGE://Section 5.8.4.7.2
$codePage = fread($input);
switchElementCodePage($codePage);
break;
default://Section 5.8.3
//Section 5.8.2
$hasAttributes = (($token&0x80) != 0);
$hasContent = (($token&0x40) != 0);
$realToken = $token & 0x3F;
$str = getTag($realToken);
parseTag($input, $str, $hasAttributes, $hasContent);
if ($realToken == 0x0f) {
//FIXME don't remember this one
$this->_isData = true;
}
break;
}
return true;
} else {
return false;
}
}
function parseTag($input, $tag, $hasAttributes, $hasContent)
{
$attrs = array();
if ($hasAttributes)
$attrs = getAttributes($input);
$this->_ch.startElement(getCurrentURI, $tag, $attrs);
if ($hasContent) {
//FIXME ///I forgot what does this does
//tagStack.add(tag);
} else {
$this->_ch.endElement($tag);
}
}
function endTag()
{
$tag;
if (tagStack.size() > 0) {
$tag = (String)tagStack.remove(tagStack.size() - 1);
} else {
$tag = 'Unknown';
}
if ($tag == 'Data') {
$this->_isData = false;
}
$this->_ch->endElement($tag);
return $tag;
}
function getAttributes($input)
{
$num = 0;
$attrs = array();
startGetAttributes;
$hasMoreAttributes = true;
$attr;
$value;
$token;
while ($hasMoreAttributes) {
$token = fread($input);
switch ($token) {
//atribute specified
case WBXML_GLOBAL_TOKEN_LITERAL://Section 5.8.4.5
if (isset($attr)) {
$attrs[$num++] = array('attribute' => $attr,
'value' => $value);
}
$attr = $this->_stringTable[XML_WBXML::getMBUInt32(input)];
break;
//value specified
case WBXML_GLOBAL_TOKEN_EXT_I_0://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_I_1://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_I_2://Section 5.8.4.2
$value = $value . termstr(input);
break;
case WBXML_GLOBAL_TOKEN_EXT_T_0://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_T_1://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_T_2://Section 5.8.4.2
$value = $value . $this->_stringTable[XML_WBXML::getMBUInt32(input)];
break;
case WBXML_GLOBAL_TOKEN_EXT_0://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_1://Section 5.8.4.2
case WBXML_GLOBAL_TOKEN_EXT_2://Section 5.8.4.2
$value = $value . fread($input);
break;
case WBXML_GLOBAL_TOKEN_ENTITY://Section 5.8.4.3
$value = $value . entity(XML_WBXML::getMBUInt32(input));
break;
case WBXML_GLOBAL_TOKEN_STR_I://Section 5.8.4.1
$value = $value . termstr($input);
break;
case WBXML_GLOBAL_TOKEN_STR_T://Section 5.8.4.1
$value = $value . $this->_stringTable[XML_WBXML::getMBUInt32($input)];
break;
case WBXML_GLOBAL_TOKEN_OPAQUE://Section 5.8.4.6
$size = XML_WBXML::getMBUInt32(input);
$b = array();
$b = fread($input, $size);
$value = $value . $b;
break;
case WBXML_GLOBAL_TOKEN_END://Section 5.8.4.7.1
$hasMoreAttributes = false;
if (isset($attr)) {
$attrs[$num++] = array('attribute' => $attr,
'value' => $value);
}
break;
case WBXML_GLOBAL_TOKEN_SWITCH_PAGE://Section 5.8.4.7.2
$codePage = fread($input);
if (!$this->_prevAttributeDTD) {
$this->_prevAttributeDTD = $this->_attributeDTD;
}
switchAttributeCodePage($codePage);
break;
default:
if ($token > 128) {
if (isset($attr)) {
$attrs[$num++] = array('attribute' => $attr,
'value' => $value );
}
$attr = $this->_attributeDTD->toAttribute($token);
} else {
//value
$value = $value . $this->_attributeDTD->toAttribute($token);
}
break;
}
}
if (!$this->_prevAttributeDTD) {
$this->_attributeDTD = $this->_prevAttributeDTD;
$this->_prevAttributeDTD = false;
}
stopGetAttributes;
}
function startGetAttributes()
{
$this->_isAttribute = true;
}
function stopGetAttributes()
{
$this->_isAttribute = false;
}
function getCurrentURI()
{
if ($_isAttribute)
return $this->_tagDTD->getURI();
else
return $this->_attributeDTD->getURI();
}
function writeString($str)
{
$this->_ch->characters(str);
}
function getTag($tag)
{
//should know which state it is in
return $this->_tagDTD->toTag($tag);
}
function getAttribute($attribute)
{
//should know which state it is in
$this->_attributeDTD->toAttribute(attribute);
}
function switchElementCodePage($codePage)
{
$this->_tagDTD = DTDManager::getInstance($this->tagDTD->toCodePage($codePage));
switchAttributeCodePage(codePage);
}
function switchAttributeCodePage($codePage)
{
$this->_attributeDTD = DTDManager::getInstance($this->_attributeDTD->toCodePage($codePage));
}
//FIXME //needs to convert base 10 to base 16(hex)
function entity($entity)
{
return $entity;
}
//FIXME //reads a null terminated string
function termstr($input)
{
return '';
}
}
class XML_WBXML_ContentHandler {
var $_currenturi;
var $_indent;
function startElement($uri, $element, $attrs)
{
_padspace;
print('<' . $element);
if ((!isset($_currenturi)) || ($_currenturi != $uri)) {
print(" xmlns=\"" . $uri . "\"");
$_currenturi = $uri;
}
foreach ($attrs as $attr) {
print(' ' . $attr['attiribute'] . "=\"" . $attr['value'] . "\"");
}
println('>');
$_indent++;
}
function endElement($uri, $element)
{
$_indent--;
_padspaces;
println('</' . $element . '>');
}
function characters($str)
{
_padspaces;
println($str);
}
//padding to make it easier to read
function _padspaces() {
for ($i = 0; $i < $indent; $i++) {
print(' ');
}
}
}
?>
More information about the sync
mailing list