中文<=>unicode<=>utf-8

Run Settings
LanguageJavaScript
Language Version
Run Command
let UNICODE = require("./tria.js"); let UTF8 = require("./dio.js"); let str = "he,βγ,你好,にほ,네이,龠龥"; console.log("str: "+str); let unicode = UNICODE.encode(str) console.log("unicode: "+unicode) let utf8 = UTF8.encode(unicode); console.log("utf8: "+utf8); let unicode2 = UTF8.decode(utf8); console.log("unicode: "+unicode2); let str2 = UNICODE.decode(unicode2); console.log("str: "+str2) console.log(` =====================我是风骚的分隔线^_^========================== `); let $str = "he,βγ,你好,にほ,네이,龠龥"; console.log("str: "+str); let $utf8 = UTF8.str2utf8($str); console.log("utf8: "+utf8); let $$str = UTF8.utf82str($utf8); console.log("str: "+$$str);
/** * 采用小端法表示 * | Unicode符号范围 | UTF-8编码方式 n | (十六进制) | (二进制) ---+-----------------------+------------------------------------------------------ 1 | 0000 0000 - 0000 007F | 0xxxxxxx 2 | 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx 3 | 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 4 | 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 5 | 0020 0000 - 03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 6 | 0400 0000 - 7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 表 1. UTF-8的编码规则 // #txt---end * * **/ function binary2hexstr(bin){ let str = bin.toString(16); return '\\x'+str; } function binary2unicodestr(bin){ let str = bin.toString(16); // let number = 4-str.length; // let ret = ""; // while(number>0&&(number-->0)){ // ret += '0'; // } return '\\u'+UNICODE.addZero(str); } /** * unicode \uXXXX => uft8 \xXX */ function encode(unicode){ if(!~unicode.indexOf("\\u")){ return ""; } let ret = []; let uniArr = unicode.split(/\\u/g).slice(1).map(v=>parseInt(v,16)); for(let i in uniArr){ let u = uniArr[i]; if(u > 0x7FFFFFFF){ ret.push(""); }else if(u > 0x3FFFFFF){ // 6 byte ret.push(binary2hexstr(u>>31&0x01|0xFC)); ret.push(binary2hexstr(u>>26&0x3F|0x80)); ret.push(binary2hexstr(u>>21&0x3F|0x80)); ret.push(binary2hexstr(u>>12&0x3F|0x80)); ret.push(binary2hexstr(u>>6&0x3F|0x80)); ret.push(binary2hexstr(u&0x3F|0x80)); }else if(u >= 0x10FFFF){ // 5 byte ret.push(binary2hexstr(u>>26&0x03|0xF8)); ret.push(binary2hexstr(u>>21&0x3F|0x80)); ret.push(binary2hexstr(u>>12&0x3F|0x80)); ret.push(binary2hexstr(u>>6&0x3F|0x80)); ret.push(binary2hexstr(u&0x3F|0x80)); }else if(u > 0xFFFF){ // 4 byte ret.push(binary2hexstr(u>>21&0x07|0xF0)); ret.push(binary2hexstr(u>>12&0x3F|0x80)); ret.push(binary2hexstr(u>>6&0x3F|0x80)); ret.push(binary2hexstr(u&0x3F|0x80)); }else if(u > 0x7FF){ // 3 byte ret.push(binary2hexstr(u>>12&0x0F|0xE0)); ret.push(binary2hexstr(u>>6&0x3F|0x80)); ret.push(binary2hexstr(u&0x3F|0x80)); }else if(u > 0x7F){ // 2 byte ret.push(binary2hexstr(u>>6&0x1F|0xC0)); ret.push(binary2hexstr(u&0x3F|0x80)); }else if(u >= 0x0){ // 1 byte ret.push(binary2hexstr(u&0x7F)) }else{ ret.push(""); } } return ret.join(""); } /** * utf8 \xXX => unicode \uXXXX */ function decode(utf8){ let ret = []; let hexArr = utf8.split('\\x').slice(1).map(v=>parseInt(v,16)); let len = hexArr.length; while(len >=0){ let firstByte = hexArr[0]; if(!firstByte||(firstByte>>6 === 2)){ break; } //method 1, is a express edition of method 2 let bin = 0x00, number2 = UTF8.getByteNumber(firstByte), n = number2; if(n===1){ bin |= hexArr[0]; }else{ while(--n >= 0){ bin |= n?(hexArr[n]&0x3F)<<6*(number2-1-n): (hexArr[n]&(0x7F>>number2))<<6*(number2-1-n); } } ret.push(binary2unicodestr(bin)); hexArr.splice(0,number2); len -= number2; // method 2 // if(firstByte >= 0xFC){ //6 byte // ret.push(binary2unicodestr( // (hexArr[0]&1)<<30 // |(hexArr[1]&0x3F)<<24 // |(hexArr[2]&0x3F)<<18 // |(hexArr[3]&0x3F)<<12 // |(hexArr[4]&0x3F)<<6 // |(hexArr[5]&0x3F) // )); // hexArr.splice(0,6); // len -= 6; // }else if(firstByte >= 0xF8){ //5 byte // ret.push(binary2unicodestr( // (hexArr[0]&0x3)<<24 // |(hexArr[1]&0x3F)<<18 // |(hexArr[2]&0x3F)<<12 // |(hexArr[3]&0x3F)<<6 // |(hexArr[4]&0x3F) // )); // hexArr.splice(0,5); // len -= 5; // }else if(firstByte >= 0xF0){ //4 byte // ret.push(binary2unicodestr( // (hexArr[0]&0x7)<<18 // |(hexArr[1]&0x3F)<<12 // |(hexArr[2]&0x3F)<<6 // |(hexArr[3]&0x3F) // )); // hexArr.splice(0,4); // len -= 4; // }else if(firstByte >= 0xE0){ //3 byte // ret.push(binary2unicodestr( // (hexArr[0]&0xF)<<12 // |(hexArr[1]&0x3F)<<6 // |(hexArr[2]&0x3F) // )); // hexArr.splice(0,3); // len -= 3; // }else if(firstByte >= 0xC0){ //2 byte // ret.push(binary2unicodestr( // (hexArr[0]&0x1F)<<6 // |(hexArr[1]&0x3F) // )); // hexArr.splice(0,2); // len -= 2; // }else if(firstByte >= 0x00){ //1 byte // ret.push(binary2unicodestr(hexArr[0])); // hexArr.splice(0,1); // len--; // } } return ret.join(""); } class UNICODE{ static addZero(hex){ let len = hex.length; while(len++<4){ hex = "0"+hex; } return hex; } /** * convert str to unicode string */ static encode(str){ let ret = []; for(let i in str){ let hex = str.charCodeAt(i).toString(16).slice(-4);//last 4 char ret.push("\\u"+UNICODE.addZero(hex)); } return ret.join(""); } /** * convert unicode string to str */ static decode(unicode){ if(!unicode){ return ""; } return unescape(unicode.replace(/\\/g,"%") .replace(/%u[\da-fA-F]{2,4}/g, v=>String.fromCharCode(parseInt(v.replace(/%u/,''),16))) ); } } class UTF8{ /** * get 1 numbers in byte */ static getByteNumber(byte){ let bit = 8; while(bit--){ if(!(byte>>bit&1)) break; } return 7-bit?7-bit:1; } static encode(unicode){ return encode(unicode); } static decode(utf8){ return decode(utf8); } static str2utf8(str){ return UTF8.encode(UNICODE.encode(str)) } static utf82str(utf8){ return UNICODE.decode(UTF8.decode(utf8)); } } module.exports = UTF8;
class UNICODE{ static addZero(hex){ let len = hex.length; while(len++<4){ hex = "0"+hex; } return hex; } /** * convert str to unicode string */ static encode(str){ let ret = []; for(let i in str){ let hex = str.charCodeAt(i).toString(16).slice(-4);//last 4 char ret.push("\\u"+UNICODE.addZero(hex)); } return ret.join(""); } /** * convert unicode string to str */ static decode(unicode){ if(!unicode){ return ""; } return unescape(unicode.replace(/\\/g,"%") .replace(/%u[\da-fA-F]{2,4}/g, v=>String.fromCharCode(parseInt(v.replace(/%u/,''),16))) ); } } module.exports = UNICODE;
Editor Settings
Theme
Key bindings
Full width
Lines