PHP实现gb2312、UTF-8等字符和unicode间的编码转换及PHP版unescape
[code]<?
/**
* 将字符串转换成unicode编码
*
* @param string $input
* @param string $input_charset
* @return string
*/
function str_to_unicode($input, $input_charset = 'gbk'){
$input = iconv($input_charset, "gbk", $input);
preg_match_all("/[\x80-\xff]?./", $input, $ar);
$b = array_map('utf8_unicode_', $ar[0]);
$outstr = join("", $b);
return $outstr;
}
function utf8_unicode_($c, $input_charset = 'gbk'){
$c = iconv($input_charset, 'utf-8', $c);
return utf8_unicode($c);
}
// utf8 -> unicode
function utf8_unicode($c) {
switch(strlen($c)) {
case 1:
return $c;
case 2:
$n = (ord($c[0]) & 0x3f) << 6;
$n += ord($c[1]) & 0x3f;
break;
case 3:
$n = (ord($c[0]) & 0x1f) << 12;
$n += (ord($c[1]) & 0x3f) << 6;
$n += ord($c[2]) & 0x3f;
break;
case 4:
$n = (ord($c[0]) & 0x0f) << 18;
$n += (ord($c[1]) & 0x3f) << 12;
$n += (ord($c[2]) & 0x3f) << 6;
$n += ord($c[3]) & 0x3f;
break;
}
return "&#$n;";
}
/**
* 将unicode字符转换成普通编码字符
*
* @param string $str
* @param string $out_charset
* @return string
*/
function str_from_unicode($str, $out_charset = 'gbk'){
$str = preg_replace_callback("|&#([0-9]{1,5});|", 'unicode2utf8_', $str);
$str = iconv("UTF-8", $out_charset, $str);
return $str;
}
function unicode2utf8_($c){
return unicode2utf8($c[1]);
}
function unicode2utf8($c){
$str="";
if ($c < 0x80) {
$str.=$c;
} else if ($c < 0x800) {
$str.=chr(0xC0 | $c>>6);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x10000) {
$str.=chr(0xE0 | $c>>12);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x200000) {
$str.=chr(0xF0 | $c>>18);
$str.=chr(0x80 | $c>>12 & 0x3F);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
}
return $str;
}
/**
* 模拟JS里的unescape
*
* @param unknown_type $str
* @return unknown
*/
function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
#print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("UCS-2","GB2312",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("UCS-2","GB2312",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
echo substr($v,2,-1);
$ar[$k] = iconv("UCS-2","GB2312",pack("n",substr($v,2,-1)));
}
}
return join("",$ar);
}[/code] 濠电姷鏁告慨鐑藉极閹间礁纾婚柣鎰惈閸ㄥ倿鏌涢锝嗙缂佺姳鍗抽弻鐔兼⒒鐎垫瓕绠為梺鎼炲労閸撴岸鎮為懖鈹惧亾楠炲灝鍔氶柟鍐茬箰鍗辨い鎺戝閸婂灚顨ラ悙鑼虎闁告梹纰嶉妵鍕晜鐠囪尙浠紓渚囧枛閻楁挸鐣烽幒妤佸€烽柡澶嬪灍閸嬫捇鎮介崨濠勫弳濠电娀娼уΛ婵嬵敁濡も偓闇夋繝濠傚缁犵偤鏌″畝瀣М妤犵偞甯¢獮瀣倷闊厽妯婇梻鍌欑閹碱偆鎮锕€绀夌€光偓閸曨偆鍙€婵犮垼鍩栭崝鏍箚閻愭番浜滈柟鎵虫櫅閻掑搫霉绾攱瀚�闂傚倸鍊搁崐鎼佸磹閻戣姤鍤勯柤鍝ユ暩娴犳氨绱撻崒娆掑厡缂侇噮鍨跺畷婵嗏枎閹惧磭鐣洪悷婊勬閻涱喖螣閻╂崿鍥х倞闁挎繂鎳嶆竟鏇㈡⒑缁嬭法鐏遍柛搴㈠姍瀵偊宕橀鐣屽弳闂佸搫娲﹂〃鍛閸撗呯=濞达絾褰冩禍楣冩⒑缁洖澧茬紒瀣浮閹繝濡烽埡鍌滃幗闂佸搫娲ㄩ崑娑㈠焵椤掆偓濠€閬嶅焵椤掍胶鍟查柟鍑ゆ嫹闂傚倸鍊搁崐鎼佸磹閻戣姤鍊块柨鏃堟暜閸嬫挾绮☉妯诲櫧闁活厽鐟╅弻鐔告綇閸撗呮殸闁诲孩鑹鹃ˇ浼村Φ閸曨垰绠抽柟瀛樼妇閸嬫挻绻濆顓炰虎婵犵數濮电喊宥夋偂濞嗘挻鈷戞い鎾卞妿閻i亶鏌$€n偅宕岄柡灞剧〒閳ь剨缍嗛崑鍛焊閻㈠憡鐓欓柛娆忣槹鐏忥妇鈧娲滈崰鏍€佸鈧幃鈺呭垂椤愶綆鍟岄梻鍌氬€搁崐椋庢濮橆兗缂氱憸宥囧弲闂侀潧鐗嗛敍鍡涘炊椤掍礁浠虹紓浣割儓濞夋洟鎮鹃崫鍕垫富闁靛牆妫欑壕鐢告煕鐎n偅灏伴柕鍥у婵偓闁斥晛鍟伴ˇ鏉款渻閵堝啫鐏い銊ワ工閻g兘骞掗幋鏂跨墯闁硅偐琛ラ幊锝夊Ψ閿斿墽鐦堥梺姹囧灲濞佳勭濠婂牊鐓熼幒鎶藉礉鎼淬劌绀嗛柟鐑橆殕閸嬫劙鎮归崶顏勭毢妞は佸啠鏀介柣鎰綑閻忥附鎱ㄥΟ绋垮缂侇喖顭峰顕€宕奸悢鍝勫妇闂傚⿴鍋勫ú銈夘敄閸涙潙鐒垫い鎺嶇劍缁€瀣煙椤斿吋鍋ユい銏″哺閸┾偓妞ゆ帒瀚拑鐔哥箾閹存瑥鐏╅柣鎺撴そ閺屾盯骞囬闂村闂佷紮绲介妶绋款潖濞差亜宸濆┑鐐寸閸ㄤ絻鐏嬮梺鍛婂姦閸犳牠鎷戦悢鍏肩厪濠电偛鐏濋崝妤佷繆閹绘帞澧涘ǎ鍥э躬椤㈡稑饪伴崨顓狀偧闂備礁婀遍崗妯荤閸洖钃熼柨婵嗘閸庣喖鏌曡箛銉х?婵炲牄鍊濆铏圭矙濞嗘儳鍓遍梺鐟版啞閹倿銆佸璺何ㄩ柍杞拌兌椤︽澘顪冮妶鍡楀Е闁稿鎳樺畷鎾绘濞戣鲸瀵岄梺闈涚墕濡绮幒鎾变簻闁挎柨鐏濆畵鍡涙煕閳瑰灝鐏茬€规洘顨婇幊鏍煛閸愭儳鏅梻鍌欒兌閹虫捇顢氶銏犵;闁绘劕鍚€閻掑﹪鏌曟繛鐐珕闁绘挻娲熼幃妤呮晲鎼粹€茬凹闁诲繐绻掗弫鍝ユ閹烘挻缍囬柕濠忕畱绾炬娊鎮楃憴鍕闁挎洏鍨藉畷娲焺閸愨晛顎撻柡澶屽仧婢ф銆佸Ο鑽ょ瘈闁汇垽娼цⅴ闂佺懓鍢查崯顐︻敇婵傜ǹ妞藉ù锝囨嚀閺嬬姵绻濋悽闈涗粶闁宦板妿閸掓帒顓兼径濠勭暫闂佺ǹ鐬奸崑娑㈠几娓氣偓閺岀喖宕滆鐢盯鏌¢崨顔藉€愰柡灞诲姂閹倝宕掑☉姗嗕紦