PHP 笔记 | php 中文unicode 互转

标签: none

php 中文unicode 互转

/**
 * $str 原始中文字符串
 * $encoding 原始字符串的编码,默认GBK
 * $prefix 编码后的前缀,默认"&#"
 * $postfix 编码后的后缀,默认";"
 */
function unicode_encode($str, $encoding = 'GBK', $prefix = '&#', $postfix = ';') {
    $str = iconv($encoding, 'UCS-2', $str);
    $arrstr = str_split($str, 2);
    $unistr = '';
    for($i = 0, $len = count($arrstr); $i < $len; $i++) {
        $dec = hexdec(bin2hex($arrstr[$i]));
        $unistr .= $prefix . $dec . $postfix;
    }
    return $unistr;
}
  
/**
 * $str Unicode编码后的字符串
 * $decoding 原始字符串的编码,默认GBK
 * $prefix 编码字符串的前缀,默认"&#"
 * $postfix 编码字符串的后缀,默认";"
 */
function unicode_decode($unistr, $encoding = 'GBK', $prefix = '&#', $postfix = ';') {
    $arruni = explode($prefix, $unistr);
    $unistr = '';
    for($i = 1, $len = count($arruni); $i < $len; $i++) {
        if (strlen($postfix) > 0) {
            $arruni[$i] = substr($arruni[$i], 0, strlen($arruni[$i]) - strlen($postfix));
        }
        $temp = intval($arruni[$i]);
        $unistr .= ($temp < 256) ? chr(0) . chr($temp) : chr($temp / 256) . chr($temp % 256);
    }
    return iconv('UCS-2', $encoding, $unistr);
}
 
//GBK字符串测试
$str = '哈哈';
echo $str.'
'; $unistr = unicode_encode($str); echo $unistr.'
'; // 哈哈 $str2 = unicode_decode($unistr); echo $str2.'
'; //哈哈 //UTF-8字符串测试 $utf8_str = iconv('GBK', 'UTF-8', $str); echo $utf8_str.'
'; // 鍝堝搱 注:UTF在GBK下显示的乱码!可切换浏览器的编码测试 $utf8_unistr = unicode_encode($utf8_str, 'UTF-8'); echo $utf8_unistr.'
'; // 哈哈 $utf8_str2 = unicode_decode($utf8_unistr, 'UTF-8'); echo $utf8_str2.'
'; // 鍝堝搱 //其它后缀、前缀测试 $prefix_unistr = unicode_encode($str, 'GBK', "\\u", ''); echo $prefix_unistr.'
'; // \u60\u98\u62\u21704\u21704\u60\u47\u98\u62 $profix_unistr2 = unicode_decode($prefix_unistr, 'GBK', "\\u", ''); echo $profix_unistr2.'
'; //哈哈

&#21457;&#36992;&#21306;&#29256;&#35268; 转换为 utf-8 中文 发邀区版规

private static function unicode_decode($unistr) {
        $arr = preg_split("/(&#[0-9]*;)/", $unistr, 0, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
        $restr='';
        foreach ($arr as $key => $value) {
            if (strstr($value,'&#')){
                $unistr = '';
                $arruni = explode('&#', $value);
                $arruni = substr($arruni[1], 0, strlen($arruni[1]) - 1);
                $temp = intval($arruni);
                $unistr .= ($temp < 256) ? chr(0) . chr($temp) : chr($temp / 256) . chr($temp % 256);
                $restr .= iconv('UCS-2BE', 'UTF-8', $unistr);
            }else{
                $restr .= $value;
            }
        }
        return $restr;
    }

php 中文unicode 互转


扫描二维码,在手机上阅读!

添加新评论