mb_regex_encoding('UTF-8');
mb_internal_encoding("UTF-8");
$cps = load_code_points();
ksort($cps);
foreach ($cps as $key => $val){
print "
Character Set: $key
Range " . $cps[$key][hexstart] ."(". $cps[$key][start] .
") to ". $cps[$key][hexend] . "(" . $cps[$key][end] . ")
";
for ($i=$cps[$key][start]; $i<= $cps[$key][end]; $i++){
$html_char = "$i;";
$char = mb_convert_encoding("$html_char", 'UTF-8', 'HTML-ENTITIES' );
$char = iconv("UTF-8","UTF-8//IGNORE",$char);
if ($char){
print "$char ";
};
};
};
// for ($i=64; $i<40000; $i++){
$html_char = "$i;";
$char = mb_convert_encoding("$html_char", 'UTF-8', 'HTML-ENTITIES' );
// $convmap = array (0x0, 0xffff, 0, 0xffff);
//$char = mb_decode_numericentity($char, $convmap, 'UTF-8');
// $char = html_entity_decode("$html_char", ENT_QUOTES, 'UTF-8');
//$encoding = "UNKNOWN";
//if (!($encoding = mb_detect_encoding($char))){
// $encoding = "INVALID";
// };
// $encoding = iconv("UTF-8","UTF-8//IGNORE",$char);
// print "$i - $char - $html_char - $encoding
\n";
// };
function load_code_points(){
$raw = explode("\n", (file_get_contents("Blocks.txt")));
foreach ($raw as $curline){
list ($prange, $name) = explode(";", $curline);
// print "Name: $name Range: $prange\n
";
if (($name) && (strpos("$curline", "#") === FALSE)){
$name = ltrim(rtrim($name));
list ($start_hex, $foo, $end_hex) = explode(".", $prange);
$cp[$name][hexstart] = $start_hex;
$cp[$name][hexend] = $end_hex;
$cp[$name][start] = hexdec($start_hex);
$cp[$name][end] = hexdec($end_hex);
};
};
return($cp);
};
?>