<?php /** * SjisUtil * 文字列処理関数集。 * $Id: SjisUtil.inc,v 1.1 2015/10/08 11:18:56 wanggb Exp $ * @author iimuro * @access public * @package jp.aimslib2.util */ class SjisUtil { /** * 文字化けしている漢字がきたら、%5Cを追加する処理 * @param String 文字列 * @return String %5Cを追加された文字列 */ public function escapePostgresLike5C($str) { // rawurlencode("—"),rawurlencode("ソ"),rawurlencode("Ы"),rawurlencode("�\") // rawurlencode("噂"),rawurlencode("浬"),rawurlencode("欺"),rawurlencode("圭") // rawurlencode("構"),rawurlencode("蚕"),rawurlencode("十"),rawurlencode("申") // rawurlencode("曾"),rawurlencode("箪"),rawurlencode("貼"),rawurlencode("能") // rawurlencode("表"),rawurlencode("暴"),rawurlencode("予"),rawurlencode("禄") // rawurlencode("兔"),rawurlencode("喀"),rawurlencode("媾"),rawurlencode("彌") // rawurlencode("拿"),rawurlencode("杤"),rawurlencode("歃"),rawurlencode("濬") // rawurlencode("畚"),rawurlencode("秉"),rawurlencode("綵"),rawurlencode("臀") // rawurlencode("藹"),rawurlencode("觸"),rawurlencode("軆"),rawurlencode("鐔") // rawurlencode("饅"),rawurlencode("鷭"),rawurlencode("�x"),rawurlencode("�x") $search_array = array("%81%5C","%83%5C","%84%5C","%87%5C", "%89%5C","%8A%5C","%8B%5C","%8C%5C", "%8D%5C","%8E%5C","%8F%5C","%90%5C", "%91%5C","%92%5C","%93%5C","%94%5C", "%95%5C","%96%5C","%97%5C","%98%5C", "%99%5C","%9A%5C","%9B%5C","%9C%5C", "%9D%5C","%9E%5C","%9F%5C","%E0%5C", "%E1%5C","%E2%5C","%E3%5C","%E4%5C", "%E5%5C","%E6%5C","%E7%5C","%E8%5C", "%E9%5C","%EA%5C","%FA%78","%FB%78" ); $replace_array = array("%81%5C%5C","%83%5C%5C","%84%5C%5C","%87%5C%5C", "%89%5C%5C","%8A%5C%5C","%8B%5C%5C","%8C%5C%5C", "%8D%5C%5C","%8E%5C%5C","%8F%5C%5C","%90%5C%5C", "%91%5C%5C","%92%5C%5C","%93%5C%5C","%94%5C%5C", "%95%5C%5C","%96%5C%5C","%97%5C%5C","%98%5C%5C", "%99%5C%5C","%9A%5C%5C","%9B%5C%5C","%9C%5C%5C", "%9D%5C%5C","%9E%5C%5C","%9F%5C%5C","%E0%5C%5C", "%E1%5C%5C","%E2%5C%5C","%E3%5C%5C","%E4%5C%5C", "%E5%5C%5C","%E6%5C%5C","%E7%5C%5C","%E8%5C%5C", "%E9%5C%5C","%EA%5C%5C","%FA%78%5C","%FB%78%5C" ); $encoded = rawurlencode($str); $tmp = str_replace($search_array, $replace_array, $encoded); $raw = rawurldecode($tmp); return $raw; } /** * 文字化けしている漢字がきたら、%5Cを追加する処理のつもり * とりあえずは何もしない。ダミー関数。 * @param String 文字列 * @return String 文字列 */ public function escapeMysqlLike5C($str) { // rawurlencode("—"),rawurlencode("ソ"),rawurlencode("Ы"),rawurlencode("�\") // rawurlencode("噂"),rawurlencode("浬"),rawurlencode("欺"),rawurlencode("圭") // rawurlencode("構"),rawurlencode("蚕"),rawurlencode("十"),rawurlencode("申") // rawurlencode("曾"),rawurlencode("箪"),rawurlencode("貼"),rawurlencode("能") // rawurlencode("表"),rawurlencode("暴"),rawurlencode("予"),rawurlencode("禄") // rawurlencode("兔"),rawurlencode("喀"),rawurlencode("媾"),rawurlencode("彌") // rawurlencode("拿"),rawurlencode("杤"),rawurlencode("歃"),rawurlencode("濬") // rawurlencode("畚"),rawurlencode("秉"),rawurlencode("綵"),rawurlencode("臀") // rawurlencode("藹"),rawurlencode("觸"),rawurlencode("軆"),rawurlencode("鐔") // rawurlencode("饅"),rawurlencode("鷭"),rawurlencode("�x"),rawurlencode("�x") // $search_array = array("%81%5C","%83%5C","%84%5C","%87%5C", // "%89%5C","%8A%5C","%8B%5C","%8C%5C", // "%8D%5C","%8E%5C","%8F%5C","%90%5C", // "%91%5C","%92%5C","%93%5C","%94%5C", // "%95%5C","%96%5C","%97%5C","%98%5C", // "%99%5C","%9A%5C","%9B%5C","%9C%5C", // "%9D%5C","%9E%5C","%9F%5C","%E0%5C", // "%E1%5C","%E2%5C","%E3%5C","%E4%5C", // "%E5%5C","%E6%5C","%E7%5C","%E8%5C", // "%E9%5C","%EA%5C","%FA%78","%FB%78" // ); // $replace_array = array("%81%5C%5C","%83%5C%5C","%84%5C%5C","%87%5C%5C", // "%89%5C%5C","%8A%5C%5C","%8B%5C%5C","%8C%5C%5C", // "%8D%5C%5C","%8E%5C%5C","%8F%5C%5C","%90%5C%5C", // "%91%5C%5C","%92%5C%5C","%93%5C%5C","%94%5C%5C", // "%95%5C%5C","%96%5C%5C","%97%5C%5C","%98%5C%5C", // "%99%5C%5C","%9A%5C%5C","%9B%5C%5C","%9C%5C%5C", // "%9D%5C%5C","%9E%5C%5C","%9F%5C%5C","%E0%5C%5C", // "%E1%5C%5C","%E2%5C%5C","%E3%5C%5C","%E4%5C%5C", // "%E5%5C%5C","%E6%5C%5C","%E7%5C%5C","%E8%5C%5C", // "%E9%5C%5C","%EA%5C%5C","%FA%78%5C","%FB%78%5C" // ); // $encoded = rawurlencode($str); // $tmp = str_replace($search_array, $replace_array, $encoded); // $raw = rawurldecode($tmp); // return $raw; return $str; } public static function stripMysqlSlashesLike5C($str) { if (defined("MYSQL_USE_5C_ESCAPE") && (!MYSQL_USE_5C_ESCAPE)) { return $str; } $search_array = array("%81%5C%5C","%83%5C%5C","%84%5C%5C","%87%5C%5C", "%89%5C%5C","%8A%5C%5C","%8B%5C%5C","%8C%5C%5C", "%8D%5C%5C","%8E%5C%5C","%8F%5C%5C","%90%5C%5C", "%91%5C%5C","%92%5C%5C","%93%5C%5C","%94%5C%5C", "%95%5C%5C","%96%5C%5C","%97%5C%5C","%98%5C%5C", "%99%5C%5C","%9A%5C%5C","%9B%5C%5C","%9C%5C%5C", "%9D%5C%5C","%9E%5C%5C","%9F%5C%5C","%E0%5C%5C", "%E1%5C%5C","%E2%5C%5C","%E3%5C%5C","%E4%5C%5C", "%E5%5C%5C","%E6%5C%5C","%E7%5C%5C","%E8%5C%5C", "%E9%5C%5C","%EA%5C%5C","%FA%78%5C","%FB%78%5C" ); $replace_array = array("%81%5C","%83%5C","%84%5C","%87%5C", "%89%5C","%8A%5C","%8B%5C","%8C%5C", "%8D%5C","%8E%5C","%8F%5C","%90%5C", "%91%5C","%92%5C","%93%5C","%94%5C", "%95%5C","%96%5C","%97%5C","%98%5C", "%99%5C","%9A%5C","%9B%5C","%9C%5C", "%9D%5C","%9E%5C","%9F%5C","%E0%5C", "%E1%5C","%E2%5C","%E3%5C","%E4%5C", "%E5%5C","%E6%5C","%E7%5C","%E8%5C", "%E9%5C","%EA%5C","%FA%78","%FB%78" ); $encoded = rawurlencode($str); $tmp = str_replace($search_array, $replace_array, $encoded); $raw = rawurldecode($tmp); return $raw; } /** * 文字コードがSJIS対応のDBの場合において、文字化け文字をエスケープせずに、 * バックスラッシュとクォートだけをエスケープします。 */ public function escapeMySQL5CwithCP932($str) { $search_array = array("%81%5C","%83%5C","%84%5C","%87%5C", "%89%5C","%8A%5C","%8B%5C","%8C%5C", "%8D%5C","%8E%5C","%8F%5C","%90%5C", "%91%5C","%92%5C","%93%5C","%94%5C", "%95%5C","%96%5C","%97%5C","%98%5C", "%99%5C","%9A%5C","%9B%5C","%9C%5C", "%9D%5C","%9E%5C","%9F%5C","%E0%5C", "%E1%5C","%E2%5C","%E3%5C","%E4%5C", "%E5%5C","%E6%5C","%E7%5C","%E8%5C", "%E9%5C","%EA%5C","%FA%78","%FB%78" ); $replace_array = array("%81%FF","%83%FF","%84%FF","%87%FF", "%89%FF","%8A%FF","%8B%FF","%8C%FF", "%8D%FF","%8E%FF","%8F%FF","%90%FF", "%91%FF","%92%FF","%93%FF","%94%FF", "%95%FF","%96%FF","%97%FF","%98%FF", "%99%FF","%9A%FF","%9B%FF","%9C%FF", "%9D%FF","%9E%FF","%9F%FF","%E0%FF", "%E1%FF","%E2%FF","%E3%FF","%E4%FF", "%E5%FF","%E6%FF","%E7%FF","%E8%FF", "%E9%FF","%EA%FF","%FA%78","%FB%78" ); $encoded = rawurlencode(trim($str)); $encoded = str_replace($search_array, $replace_array, $encoded); //ErrorLogger::doOutput("0000000000000:" . $encoded, 0); // この時点で、5Cなのは純粋なバックスラッシュのみ。 // 文字化け回避した文字を最後の文字とする文字列に分割 if (strpos($encoded, "%FF") !== false) { $tmp_list = explode("%FF", $encoded); $tmp_list2 = array(); for ($x = 0; $x < count($tmp_list); $x++) { if ($x == count($tmp_list) - 1) { $tmp = $tmp_list[$x]; } else { $tmp = $tmp_list[$x] . "%FF"; } array_push($tmp_list2, $tmp); //ErrorLogger::doOutput("X" . $tmp, 0); } } else { $tmp_list2 = array($encoded); } $count = 0; $tmp_result = ""; foreach ($tmp_list2 as $tmp) { //ErrorLogger::doOutput("X" . $count . ":" . $tmp, 0); $count++; $tail = ""; // 最後がバックスラッシュだったら、とりあえず除外。 while (true) { if ((strlen($tmp) >= 6) && ((substr($tmp, -3) == "%5C") || (substr($tmp, -3) == "%27"))){ $tail = substr($tmp, -3) . $tail; $tmp = substr($tmp, 0, strlen($tmp) - 3); } else { break; } } //ErrorLogger::doOutput("MAIN:" . $tmp, 0); //ErrorLogger::doOutput("TAIL:" . $tail, 0); // 置換された最後の文字が、5C単独なのか、2バイト文字なのかを判別 $add_escape = false; if ((strlen($tmp) >= 6) && (substr($tmp, -3) == "%FF")) { $x = -3; $ok_count = 0; while (true) { $chr = substr($tmp, $x -3, $x); if ($chr == "") { break; } if (!SjisUtil::isSJISFirstByte($chr)) { break; } else { $ok_count++; } $x -= 3; } // 文字化け対象として、FFに置換された文字が、その前のSJIS2バイト文字の一部であるかを、 // SJISの先頭文字である「81〜9F」と「E0〜EF」を2バイト文字が使われていないところから、 // 奇数であるか偶数であるかで判断する。 if (($ok_count > 0) && ($ok_count % 2 == 1 )) { // 奇数はそのまま→文字の一部 //ErrorLogger::doOutput("ODD", 0); } else { // 偶数なら、1こ余計にエスケープ→たまたまエスケープ文字と同じ形になったバックスラッシュ //ErrorLogger::doOutput("EVEN", 0); $add_escape = true; } } else { // 2バイト文字の長さが無い } // バックスラッシュだけをエスケープ $tmp = str_replace("%5C", "%5C%5C", $tmp); $tail = str_replace("%5C", "%5C%5C", $tail); $tmp = str_replace("%27", "%5C%27", $tmp); $tail = str_replace("%27", "%5C%27", $tail); // 元に戻す。 $tmp = str_replace($replace_array, $search_array, $tmp); // 間違ってFFエスケープされてしまった5Cをエスケープ if ($add_escape) { $tmp .= "%5C"; } // 切り取った部分を元に戻す。 $tmp_result .= $tmp . $tail; } // やっと元に戻れた $raw = rawurldecode($tmp_result); //ErrorLogger::doOutput("4444444444444:" . $raw, 0); return $raw; } /** * 対象の文字列が、SJIS文字の頭の2文字である「81〜9F」と「E0〜EF」に含まれるかどうか? */ public function isSJISFirstByte($str) { if (preg_match("/^%[89EF][0-9A-F]$/", $str)) { //ErrorLogger::doOutput("OK:" . $str, 0); return true; } //ErrorLogger::doOutput("NOT FIRST BYTE:" . $str, 0); return false; } } ?>