php中gb2312跟unicode码的转换
[code]<?php
// utf8 ---> unicode
function utf8_unicode($c) {
switch(strlen($c)) {
case 1:
return ord($c);
case 2:
$n = (ord($c[0]) & 0x3f) << 6;
$n += ord($c[1]) & 0x3f;
return $n;
case 3:
$n = (ord($c[0]) & 0x1f) << 12;
$n += (ord($c[1]) & 0x3f) << 6;
$n += ord($c[2]) & 0x3f;
return $n;
case 4:
$n = (ord($c[0]) & 0x0f) << 18;
$n += (ord($c[1]) & 0x3f) << 12;
$n += (ord($c[2]) & 0x3f) << 6;
$n += ord($c[3]) & 0x3f;
return $n;
}
}
//gb2312转换为unicode
function gb2un($g)//传入gb2312字符串返回unicode码
{
preg_match_all("/[\x80-\xff]?./",$g,$ar);
$str = "";
foreach($ar[0] as $v)
{
$str = $str."&#".utf8_unicode(iconv("gb2312","utf-8",$v)).";";
}
return $str;
}
//unicode ---> gb2312
function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/u",$str,$r);
$ar = $r[0];
//print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("ucs-2","gb2312",pack("h4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("ucs-2","gb2312",pack("h4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
//echo substr($v,2,-1)."<br>";
$ar[$k] = iconv("ucs-2","gb2312",pack("n",substr($v,2,-1)));
}
}
return join("",$ar);
}
//unicode转换为gb2312
function un2gb($y)//传入unicode码返回gb2312
{
return unescape($y);
}
?> [/code]