[dev] wrapping Chinese text

Wenzhuo Zhang wenzhuo@zhmail.com
Mon, 8 Jul 2002 00:04:48 +0800


Hi,

Usually, spaces are not used to separate words/sentences in Chinese. So
wordwrap does not work as expected for Chinese text. The following patch
is an effort to wrap Chinese text when replying messages.


--- horde/lib/Text.php.dist	Thu Apr 18 02:23:10 2002
+++ horde/lib/Text.php	Sun Jul  7 23:10:30 2002
@@ -42,6 +42,40 @@
     }
 
     /**
+     * Fixes incorrect wrappings which split double-byte gb2312 characters
+     *
+     * @param string $text	  String containing wrapped gb2312 characters
+     * @param $break_char	  Character used to break lines.
+     *
+     * @return string		  String containing fixed text.
+     */
+    function trim_gb2312($str, $break_char = "\n")
+    {
+        $lines = explode($break_char, $str);
+
+        for ($i = 0; $i < count($lines) - 1; $i++) {
+                $line = $lines[$i];
+                $len = strlen($line);
+
+                /* parse double-byte gb2312 characters */
+                for ($c = 0; $c < $len - 1; $c++) {
+                        if (ord($line{$c}) & 128) {
+                                if (ord($line{$c + 1}) & 128) $c++;
+                        }
+                }
+
+                /* If the last character of the current line is the first byte
+                   of a double-byte character, move it to the start of the
+                   next line. */
+                if (($c == $len - 1) && (ord($line[$c]) & 128)) {
+                        $lines[$i] = substr($line, 0, -1);
+                        $lines[$i + 1] = $line[$c] . $lines[$i + 1];
+                }
+        }
+        return implode($break_char, $lines);
+    }
+
+    /**
      * Wraps the text of a message.
      *
      * @param string $text         String containing the text to wrap.
@@ -50,12 +84,24 @@
      *
      * @return  string      String containing the wrapped text.
      */
-    function wrap($text, $length = 80, $break_char = "\n")
+    function wrap($text, $length = 80, $break_char = "\n", $charset = "iso-8859-1")
     {
         $paragraphs = explode("\n", $text);
-        for ($i = 0; $i < count($paragraphs); $i++) {
-            $paragraphs[$i] = wordwrap($paragraphs[$i], $length, $break_char);
-        }
+
+        $charset = strtolower($charset);
+        switch ($charset) {
+            case "gb2312":
+                for ($i = 0; $i < count($paragraphs); $i++) {
+                    $paragraphs[$i] = wordwrap($paragraphs[$i], $length, $break_char, 1);
+                    $paragraphs[$i] = Text::trim_gb2312($paragraphs[$i], $break_char);
+                }
+                break;
+            default:
+		for ($i = 0; $i < count($paragraphs); $i++) {
+		    $paragraphs[$i] = wordwrap($paragraphs[$i], $length, $break_char);
+		}
+		break;
+	}
         return implode($break_char, $paragraphs);
     }
 
--- horde/imp/compose.php.dist	Thu Jun  6 06:49:02 2002
+++ horde/imp/compose.php	Sun Jul  7 22:48:35 2002
@@ -580,7 +580,7 @@
              if ($wrap_width < 20) {
                  $wrap_width = 20;
              }
-             $msg = Text::wrap($msg, $wrap_width, $quote_str);
+             $msg = Text::wrap($msg, $wrap_width, $quote_str, Lang::getCharset());
              $msg  = _("Quoting") . ' ' . $qfrom . ":\n$quote_str$msg";
              $msg .= "\n";
          }

-- 
Wenzhuo
  GnuPG Key ID 0xBA586A68
  Key fingerprint = 89C7 C6DE D956 F978 3F12  A8AF 5847 F840 BA58 6A68