[sync] WBXML Encoder patch (was: no subject)
Anthony Mills
amills at gascard.net
Thu Dec 4 08:29:09 PST 2003
Sorry it is late, forgot to put a subject.
Anthony
Anthony Mills wrote:
> With two small problems of course.
>
> One look on line 231. It treats the token as a false, or as not a false.
>
> Two opaque data is ignored. Not sure how SyncML upstream will handle opaque
> data.
>
> I know it's kludgy, but it works with SyncML.
>
> Have fun.
> Anthony
>
>
> ------------------------------------------------------------------------
>
> ? patch.txt
> ? docs/examples/DecoderTest.php
> ? docs/examples/test-syncml_client_packet_1.xml.wbxml
> Index: WBXML.php
> ===================================================================
> RCS file: /repository/framework/XML_WBXML/WBXML.php,v
> retrieving revision 1.7
> diff -u -r1.7 WBXML.php
> --- WBXML.php 4 Dec 2003 04:11:47 -0000 1.7
> +++ WBXML.php 4 Dec 2003 08:08:12 -0000
> @@ -86,14 +86,15 @@
> $b = 0;
>
> do {
> - $b = fread($in, 1);
> - $bs->set($j++, ($b & 64) != 0); // 0x40
> - $bs->set($j++, ($b & 32) != 0); // 0x20
> - $bs->set($j++, ($b & 16) != 0); // 0x10
> - $bs->set($j++, ($b & 8) != 0); // 0x08
> - $bs->set($j++, ($b & 4) != 0); // 0x04
> - $bs->set($j++, ($b & 2) != 0); // 0x02
> - $bs->set($j++, ($b & 1) != 0); // 0x01
> + $b = ord(fread($in, 1));
> +
> + $bs->set($j++, ((($b & 64) != 0) ? 1 : 0)); // 0x40
> + $bs->set($j++, ((($b & 32) != 0) ? 1 : 0)); // 0x20
> + $bs->set($j++, ((($b & 16) != 0) ? 1 : 0)); // 0x10
> + $bs->set($j++, ((($b & 8) != 0) ? 1 : 0)); // 0x08
> + $bs->set($j++, ((($b & 4) != 0) ? 1 : 0)); // 0x04
> + $bs->set($j++, ((($b & 2) != 0) ? 1 : 0)); // 0x02
> + $bs->set($j++, ((($b & 1) != 0) ? 1 : 0)); // 0x01
> } while (($b & 128) != 0);
>
> $value = 0;
> Index: WBXML/DTD.php
> ===================================================================
> RCS file: /repository/framework/XML_WBXML/WBXML/DTD.php,v
> retrieving revision 1.3
> diff -u -r1.3 DTD.php
> --- WBXML/DTD.php 2 Dec 2003 19:15:58 -0000 1.3
> +++ WBXML/DTD.php 4 Dec 2003 08:08:12 -0000
> @@ -26,7 +26,7 @@
> var $XMLNS;
> var $DPI;
>
> - function XML_WBXMLDTD($v)
> + function XML_WBXML_DTD($v)
> {
> $this->version = $v;
> $this->init();
> Index: WBXML/Decoder.php
> ===================================================================
> RCS file: /repository/framework/XML_WBXML/WBXML/Decoder.php,v
> retrieving revision 1.6
> diff -u -r1.6 Decoder.php
> --- WBXML/Decoder.php 4 Dec 2003 04:35:56 -0000 1.6
> +++ WBXML/Decoder.php 4 Dec 2003 08:08:13 -0000
> @@ -75,6 +75,11 @@
> {
> $this->_dtdManager = &new XML_WBXML_DTDManager();
> }
> +
> + function getbyte($input)
> + {
> + return ord(fread($input, 1));
> + }
>
> function decode($input)
> {
> @@ -82,6 +87,8 @@
> // version = u_int8
> // currently 1, 2 or 3
> $this->_wbxmlVersion = $this->getVersionNumber($input);
> + //debug
> + //print("Version: |" . $this->_wbxmlVersion . "|\n");
>
> // Get Document Public Idetifier from Section 5.5
> // publicid = mb_u_int32 | (zero index)
> @@ -89,19 +96,26 @@
> // Containing the value zero (0)
> // The actual DPI is determined after the String Table is read.
> $dpiStruct = $this->getDocumentPublicIdentifier($input);
> +
> + //print("dpiType: " . $dpiStruct['dpiType'] . " dpiNumber: " . $dpiStruct['dpiNumber'] . "\n");
>
> // Get Charset from 5.6
> // charset = mb_u_int32
> $this->_charset = $this->getCharset($input);
> + //debug
> + //print("Charset: " . $this->_charset . "\n");
>
> // Get String Table from 5.7
> // strb1 = length *byte
> $this->_stringTable = $this->getStringTable($input, $this->_charset);
> + //debug
> + //$this->print_stringtable($this->_stringTable);
>
> // Get Document Public Idetifier from Section 5.5
> $this->_dpi = $this->getDocumentPublicIdentifierImpl($dpiStruct['dpiType'],
> $dpiStruct['dpiNumber'],
> $this->_stringTable);
> + //print("DPI: " . $this->_dpi . "\n");
>
> // Now the real fun begins.
> // from Sections 5.2 and 5.8
> @@ -109,12 +123,26 @@
> // Default content handler.
> $this->_ch = &new XML_WBXML_ContentHandler();
>
> + // Default content handler.
> + $this->_dtdManager = new XML_WBXML_DTDManager();
> +
> + $this->_tagDTD = $this->_dtdManager->getInstance('-//SYNCML//DTD SyncML 1.1//EN');
> + $this->_attributeDTD= $this->_tagDTD;
> +
> + //print("dtd=|" . $this->_tagDTD->getURI() . "|\n");
> +
> + //get the starting DTD
> + $this->_tagDTD = $this->_dtdManager->getInstance($this->_dpi);
> + $this->_attributeDTD= $this->_tagDTD;
> +
> + //print("dtd=|" . $this->_tagDTD->getURI() . "|\n");
> +
> while ($this->decodeInternal($input));
> }
>
> function getVersionNumber($input)
> {
> - return fread($input, 1);
> + return $this->getbyte($input);
> }
>
> function getDocumentPublicIdentifier($input)
> @@ -126,7 +154,7 @@
>
> if ($i == 0) {
> $dpiStruct['dpiType'] = 2;
> - $dpiStruct['dpiNumber'] = fread($input, 1);
> + $dpiStruct['dpiNumber'] = $this->getbyte($input);
> } else {
> $dpiStruct['dpiType'] = 1;
> $dpiStruct['dpiNumber'] = $i;
> @@ -153,7 +181,10 @@
> function getCharset($input)
> {
> $cs = XML_WBXML::MBUInt32ToInt($input);
> + //print("cs num" . $cs . "\n");
> +
> $ret = XML_WBXML::getCharsetString($cs);
> +
> return $ret;
> }
>
> @@ -162,52 +193,59 @@
> */
> function getStringTable($input, $cs)
> {
> - $st = array();
> + $strtable = array();
> $size = XML_WBXML::MBUInt32ToInt($input);
> + //print("Size: " . $size);
> + //a hack to make it work with arrays
> + $str = "j";
> +
> + $numstr = 0;
> + $start = 0;
> + $j = 0;
> + for ($i = 0; $i < $size; $i++ ) {
> + //May need to fix the null detector for more than single byte charsets like ASCII, UTF-8, etc
> + $ch = fread($input, 1);
> + if (ord($ch) == 0) {
> + $strtable[$numstr++] = $str;
> + $str = "#";
> + $start = $i+1;
> + } else {
> + $str[$j++] = $ch;
> + //print($ch);
> + }
> + }
>
> - // Nice big ol'
> - // FIXME
> - // Will need help rewriting this one
> -// byte[] bytes = new byte[size];
> -//
> -// int start = 0;
> -// for (int i = 0; i < size; i++ ) {
> -// //May need to fix the null detector for more than ASCII, UTF-8, etc charsets
> -// if (isStringTerminator(pbis, cs)) {
> -// pbis.read();
> -// String newString = new String( bytes, start, i - start, charset );
> -// st.put(new Integer(start), newString);
> -// start = i+1;
> -// System.err.println("start: " + start + " newString: " + newString);
> -// } else {
> -// bytes[i] = (byte)pbis.read();
> -// }
> -// }
> -//
> -// if (start<size) {
> -// String newString = new String( bytes, start, size - start, charset);
> -// st.put(new Integer(start), newString);
> -// }
> -//
> + if ($start<$size) {
> + $strtable[$numstr++] = $str;
> + }
>
> - return $st;
> + return $strtable;
> }
>
> function decodeInternal($input)
> {
> - $token = fread($input, 1);
> -
> + $token = $this->getbyte($input);
> +
> + //print("\$token=" . $token);
> +
> + //zero is read as false it messes up
> + //this way will never end
> + //if ($token == 0 || $token) {
> + //zero is read as false it messes up
> + //this way will end too soon
> if ($token) {
> $str = '';
>
> switch ($token) {
> case XML_WBXML_GLOBAL_TOKEN_STR_I:
> + //print("XML_WBXML_GLOBAL_TOKEN_STR_I " . $token);
> // Section 5.8.4.1
> $str = $this->termstr($input);
> $this->_ch->characters($str);
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_STR_T:
> + //print("XML_WBXML_GLOBAL_TOKEN_STR_T " . $token);
> // Section 5.8.4.1
> $str = $this->_stringTable[XML_WBXML::MBUInt32ToInt($intput)];
> $this->_ch->characters($str);
> @@ -216,6 +254,7 @@
> case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
> case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
> case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
> + //print("XML_WBXML_GLOBAL_TOKEN_EXT_I_? " . $token);
> // Section 5.8.4.2
> $str = $this->termstr($input);
> $this->_ch->characters($str);
> @@ -224,6 +263,7 @@
> case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
> case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
> case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
> + //print("XML_WBXML_GLOBAL_TOKEN_EXT_T_? " . $token);
> // Section 5.8.4.2
> $str = $this->_stringTable[XML_WBXML::MBUInt32ToInt($intput)];
> $this->_ch->characters($str);
> @@ -232,12 +272,14 @@
> case XML_WBXML_GLOBAL_TOKEN_EXT_0:
> case XML_WBXML_GLOBAL_TOKEN_EXT_1:
> case XML_WBXML_GLOBAL_TOKEN_EXT_2:
> + //print("XML_WBXML_GLOBAL_TOKEN_EXT_? " . $token);
> // Section 5.8.4.2
> - $extension = fread($input, 1);
> + $extension = getbyte($input, 1);
> $this->_ch->characters($extension);
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_ENTITY:
> + //print("XML_WBXML_GLOBAL_TOKEN_ENTITY " . $token);
> // Section 5.8.4.3
> // UCS-4 chracter encoding?
> $entity = $this->entity(XML_WBXML::MBUInt32ToInt($input));
> @@ -246,35 +288,41 @@
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_PI:
> + //print("XML_WBXML_GLOBAL_TOKEN_PI " . $token);
> // Section 5.8.4.4
> // throw new IOException("WBXML global token processing instruction(PI, " + token + ") is unsupported!");
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_LITERAL:
> + //print("XML_WBXML_GLOBAL_TOKEN_LITERAL " . $token);
> // Section 5.8.4.5
> $str = $this->_stringTable[XML_WBXML::MBUInt32ToInt($input)];
> $this->parseTag($input, $str, false, false);
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_LITERAL_A:
> + //print("XML_WBXML_GLOBAL_TOKEN_LITERAL_A " . $token);
> // Section 5.8.4.5
> $str = $this->_stringTable[XML_WBXML::MBUInt32ToInt($input)];
> $this->parseTag($input, $str, true, false);
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_LITERAL_AC:
> + //print("XML_WBXML_GLOBAL_TOKEN_LITERAL_AC " . $token);
> // Section 5.8.4.5
> $str = $this->_stringTable[XML_WBXML::MBUInt32ToInt($input)];
> $this->parseTag($input, $string, true, true);
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_LITERAL_C:
> + //print("XML_WBXML_GLOBAL_TOKEN_LITERAL_C " . $token);
> // Section 5.8.4.5
> $str = $this->_stringTable[XML_WBXML::MBUInt32ToInt($input)];
> $this->parseTag($input, $str, false, true);
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
> + //print("XML_WBXML_GLOBAL_TOKEN_OPAQUE " . $token);
> // Section 5.8.4.6
> $size = XML_WBXML::MBUInt32ToInt($input);
> $b = fread($input, $size);
> @@ -294,23 +342,27 @@
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_END:
> + //print("XML_WBXML_GLOBAL_TOKEN_END " . $token);
> // Section 5.8.4.7.1
> $str = $this->endTag();
> break;
>
> case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
> + //print('XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE ' . $token);
> // Section 5.8.4.7.2
> - $codePage = fread($input, 1);
> + $codePage = $this->getbyte($input, 1);
> $this->switchElementCodePage($codePage);
> break;
>
> default:
> + //print("default " . $token);
> // Section 5.8.2
> // Section 5.8.3
> $hasAttributes = (($token & 0x80) != 0);
> $hasContent = (($token & 0x40) != 0);
> $realToken = $token & 0x3F;
> $str = $this->getTag($realToken);
> + //print("\$str=" . $str);
>
> $this->parseTag($input, $str, $hasAttributes, $hasContent);
>
> @@ -373,7 +425,7 @@
> $token = null;
>
> while ($hasMoreAttributes) {
> - $token = fread($input, 1);
> + $token = getbyte($input, 1);
>
> switch ($token) {
> // Attribute specified.
> @@ -443,7 +495,7 @@
>
> case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
> // Section 5.8.4.7.2
> - $codePage = fread($input, 1);
> + $codePage = getbyte($input, 1);
> if (!$this->_prevAttributeDTD) {
> $this->_prevAttributeDTD = $this->_attributeDTD;
> }
> @@ -500,25 +552,31 @@
>
> function getTag($tag)
> {
> + //print("\$tag=" . $tag);
> // Should know which state it is in.
> - return $this->_tagDTD->toTag($tag);
> + return $this->_tagDTD->toTagStr($tag);
> }
>
> function getAttribute($attribute)
> {
> // Should know which state it is in.
> - $this->_attributeDTD->toAttribute($attribute);
> + $this->_attributeDTD->toAttributeInt($attribute);
> }
>
> function switchElementCodePage($codePage)
> {
> - $this->_tagDTD = &$this->_dtdManager->getInstance($codePage);
> + print("\$this->_tagDTD->getURI()=" . $codePage);
> +
> + $this->_tagDTD = &$this->_dtdManager->getInstance($this->_tagDTD->toCodePageStr($codePage));
> +
> + print("\$this->_tagDTD->getURI()=" . $this->_tagDTD->getURI());
> +
> $this->switchAttributeCodePage($codePage);
> }
>
> function switchAttributeCodePage($codePage)
> {
> - $this->_attributeDTD = &$this->_dtdManager->getInstance($codePage);
> + $this->_attributeDTD = &$this->_dtdManager->getInstance($this->_attributeDTD->toCodePageStr($codePage));
> }
>
> /**
> @@ -526,7 +584,7 @@
> */
> function entity($entity)
> {
> - return $entity;
> + return dechex($entity);
> }
>
> /**
> @@ -534,7 +592,29 @@
> */
> function termstr($input)
> {
> - return '';
> + $str = '#';
> + $i = 0;
> + $ch = fread($input, 1);
> + while (ord($ch) != 0) {
> + $str[$i++] = $ch;
> + $ch = fread($input, 1);
> + }
> +
> + return $str;
> + }
> +
> + /**
> + * For debugging
> + */
> +
> + function print_stringtable($st)
> + {
> + print("String Table: ");
> + foreach ($st as $s) {
> + print($s . ", ");
> + }
> +
> + print("\n");
> }
>
> }
> Index: docs/examples/decode.php
> ===================================================================
> RCS file: /repository/framework/XML_WBXML/docs/examples/decode.php,v
> retrieving revision 1.1
> diff -u -r1.1 decode.php
> --- docs/examples/decode.php 4 Dec 2003 04:35:56 -0000 1.1
> +++ docs/examples/decode.php 4 Dec 2003 08:08:13 -0000
> @@ -10,3 +10,8 @@
> $input = fopen('syncml_client_packet_1.wbxml', 'rb');
>
> $decoder->decode($input);
> +
> +fclose($input);
> +
> +
> +
>
>
> ------------------------------------------------------------------------
>
>
More information about the sync
mailing list