SjisUtil.inc 10.8 KB
<?php
/**
 * SjisUtil
 * 文字列処理関数集。
 * $Id: SjisUtil.inc,v 1.1 2015/10/08 11:18:56 wanggb Exp $
 * @author iimuro
 * @access public
 * @package jp.aimslib2.util
 */
 
class SjisUtil {


	/**
	 * 文字化けしている漢字がきたら、%5Cを追加する処理
	 * @param String 文字列
	 * @return String %5Cを追加された文字列
	 */
	public function escapePostgresLike5C($str) {
		
		// rawurlencode("—"),rawurlencode("ソ"),rawurlencode("Ы"),rawurlencode("�\")
		// rawurlencode("噂"),rawurlencode("浬"),rawurlencode("欺"),rawurlencode("圭")
		// rawurlencode("構"),rawurlencode("蚕"),rawurlencode("十"),rawurlencode("申")
		// rawurlencode("曾"),rawurlencode("箪"),rawurlencode("貼"),rawurlencode("能")
		// rawurlencode("表"),rawurlencode("暴"),rawurlencode("予"),rawurlencode("禄")
		// rawurlencode("兔"),rawurlencode("喀"),rawurlencode("媾"),rawurlencode("彌")
		// rawurlencode("拿"),rawurlencode("杤"),rawurlencode("歃"),rawurlencode("濬")
		// rawurlencode("畚"),rawurlencode("秉"),rawurlencode("綵"),rawurlencode("臀")
		// rawurlencode("藹"),rawurlencode("觸"),rawurlencode("軆"),rawurlencode("鐔")
		// rawurlencode("饅"),rawurlencode("鷭"),rawurlencode("�x"),rawurlencode("�x")

		$search_array = array("%81%5C","%83%5C","%84%5C","%87%5C",
							"%89%5C","%8A%5C","%8B%5C","%8C%5C",
							"%8D%5C","%8E%5C","%8F%5C","%90%5C",
							"%91%5C","%92%5C","%93%5C","%94%5C",
							"%95%5C","%96%5C","%97%5C","%98%5C",
							"%99%5C","%9A%5C","%9B%5C","%9C%5C",
							"%9D%5C","%9E%5C","%9F%5C","%E0%5C",
							"%E1%5C","%E2%5C","%E3%5C","%E4%5C",
							"%E5%5C","%E6%5C","%E7%5C","%E8%5C",
							"%E9%5C","%EA%5C","%FA%78","%FB%78"
							);

		$replace_array = array("%81%5C%5C","%83%5C%5C","%84%5C%5C","%87%5C%5C",
							"%89%5C%5C","%8A%5C%5C","%8B%5C%5C","%8C%5C%5C",
							"%8D%5C%5C","%8E%5C%5C","%8F%5C%5C","%90%5C%5C",
							"%91%5C%5C","%92%5C%5C","%93%5C%5C","%94%5C%5C",
							"%95%5C%5C","%96%5C%5C","%97%5C%5C","%98%5C%5C",
							"%99%5C%5C","%9A%5C%5C","%9B%5C%5C","%9C%5C%5C",
							"%9D%5C%5C","%9E%5C%5C","%9F%5C%5C","%E0%5C%5C",
							"%E1%5C%5C","%E2%5C%5C","%E3%5C%5C","%E4%5C%5C",
							"%E5%5C%5C","%E6%5C%5C","%E7%5C%5C","%E8%5C%5C",
							"%E9%5C%5C","%EA%5C%5C","%FA%78%5C","%FB%78%5C"
							);


		$encoded = rawurlencode($str);
		
		$tmp = str_replace($search_array, $replace_array, $encoded);
		
		$raw = rawurldecode($tmp);
		
		return $raw;
	
	}
	

	/**
	 * 文字化けしている漢字がきたら、%5Cを追加する処理のつもり
	 * とりあえずは何もしない。ダミー関数。
	 * @param String 文字列
	 * @return String 文字列
	 */
	public function escapeMysqlLike5C($str) {
		
		// rawurlencode("—"),rawurlencode("ソ"),rawurlencode("Ы"),rawurlencode("�\")
		// rawurlencode("噂"),rawurlencode("浬"),rawurlencode("欺"),rawurlencode("圭")
		// rawurlencode("構"),rawurlencode("蚕"),rawurlencode("十"),rawurlencode("申")
		// rawurlencode("曾"),rawurlencode("箪"),rawurlencode("貼"),rawurlencode("能")
		// rawurlencode("表"),rawurlencode("暴"),rawurlencode("予"),rawurlencode("禄")
		// rawurlencode("兔"),rawurlencode("喀"),rawurlencode("媾"),rawurlencode("彌")
		// rawurlencode("拿"),rawurlencode("杤"),rawurlencode("歃"),rawurlencode("濬")
		// rawurlencode("畚"),rawurlencode("秉"),rawurlencode("綵"),rawurlencode("臀")
		// rawurlencode("藹"),rawurlencode("觸"),rawurlencode("軆"),rawurlencode("鐔")
		// rawurlencode("饅"),rawurlencode("鷭"),rawurlencode("�x"),rawurlencode("�x")

//		$search_array = array("%81%5C","%83%5C","%84%5C","%87%5C",
//							"%89%5C","%8A%5C","%8B%5C","%8C%5C",
//							"%8D%5C","%8E%5C","%8F%5C","%90%5C",
//							"%91%5C","%92%5C","%93%5C","%94%5C",
//							"%95%5C","%96%5C","%97%5C","%98%5C",
//							"%99%5C","%9A%5C","%9B%5C","%9C%5C",
//							"%9D%5C","%9E%5C","%9F%5C","%E0%5C",
//							"%E1%5C","%E2%5C","%E3%5C","%E4%5C",
//							"%E5%5C","%E6%5C","%E7%5C","%E8%5C",
//							"%E9%5C","%EA%5C","%FA%78","%FB%78"
//							);

//		$replace_array = array("%81%5C%5C","%83%5C%5C","%84%5C%5C","%87%5C%5C",
//							"%89%5C%5C","%8A%5C%5C","%8B%5C%5C","%8C%5C%5C",
//							"%8D%5C%5C","%8E%5C%5C","%8F%5C%5C","%90%5C%5C",
//							"%91%5C%5C","%92%5C%5C","%93%5C%5C","%94%5C%5C",
//							"%95%5C%5C","%96%5C%5C","%97%5C%5C","%98%5C%5C",
//							"%99%5C%5C","%9A%5C%5C","%9B%5C%5C","%9C%5C%5C",
//							"%9D%5C%5C","%9E%5C%5C","%9F%5C%5C","%E0%5C%5C",
//							"%E1%5C%5C","%E2%5C%5C","%E3%5C%5C","%E4%5C%5C",
//							"%E5%5C%5C","%E6%5C%5C","%E7%5C%5C","%E8%5C%5C",
//							"%E9%5C%5C","%EA%5C%5C","%FA%78%5C","%FB%78%5C"
//							);


//		$encoded = rawurlencode($str);
		
//		$tmp = str_replace($search_array, $replace_array, $encoded);
		
//		$raw = rawurldecode($tmp);
		
//		return $raw;

		return $str;
	
	}



	public static function stripMysqlSlashesLike5C($str) {
		
		if (defined("MYSQL_USE_5C_ESCAPE") && (!MYSQL_USE_5C_ESCAPE)) {
			return $str;
		}
		
		$search_array = array("%81%5C%5C","%83%5C%5C","%84%5C%5C","%87%5C%5C",
							"%89%5C%5C","%8A%5C%5C","%8B%5C%5C","%8C%5C%5C",
							"%8D%5C%5C","%8E%5C%5C","%8F%5C%5C","%90%5C%5C",
							"%91%5C%5C","%92%5C%5C","%93%5C%5C","%94%5C%5C",
							"%95%5C%5C","%96%5C%5C","%97%5C%5C","%98%5C%5C",
							"%99%5C%5C","%9A%5C%5C","%9B%5C%5C","%9C%5C%5C",
							"%9D%5C%5C","%9E%5C%5C","%9F%5C%5C","%E0%5C%5C",
							"%E1%5C%5C","%E2%5C%5C","%E3%5C%5C","%E4%5C%5C",
							"%E5%5C%5C","%E6%5C%5C","%E7%5C%5C","%E8%5C%5C",
							"%E9%5C%5C","%EA%5C%5C","%FA%78%5C","%FB%78%5C"
							);

		$replace_array = array("%81%5C","%83%5C","%84%5C","%87%5C",
							"%89%5C","%8A%5C","%8B%5C","%8C%5C",
							"%8D%5C","%8E%5C","%8F%5C","%90%5C",
							"%91%5C","%92%5C","%93%5C","%94%5C",
							"%95%5C","%96%5C","%97%5C","%98%5C",
							"%99%5C","%9A%5C","%9B%5C","%9C%5C",
							"%9D%5C","%9E%5C","%9F%5C","%E0%5C",
							"%E1%5C","%E2%5C","%E3%5C","%E4%5C",
							"%E5%5C","%E6%5C","%E7%5C","%E8%5C",
							"%E9%5C","%EA%5C","%FA%78","%FB%78"
							);

		$encoded = rawurlencode($str);
		
		$tmp = str_replace($search_array, $replace_array, $encoded);
		
		$raw = rawurldecode($tmp);
		
		return $raw;
	
	}

	
	/**
	 * 文字コードがSJIS対応のDBの場合において、文字化け文字をエスケープせずに、
	 * バックスラッシュとクォートだけをエスケープします。
	 */
	public function escapeMySQL5CwithCP932($str) {
		
		$search_array = array("%81%5C","%83%5C","%84%5C","%87%5C",
							"%89%5C","%8A%5C","%8B%5C","%8C%5C",
							"%8D%5C","%8E%5C","%8F%5C","%90%5C",
							"%91%5C","%92%5C","%93%5C","%94%5C",
							"%95%5C","%96%5C","%97%5C","%98%5C",
							"%99%5C","%9A%5C","%9B%5C","%9C%5C",
							"%9D%5C","%9E%5C","%9F%5C","%E0%5C",
							"%E1%5C","%E2%5C","%E3%5C","%E4%5C",
							"%E5%5C","%E6%5C","%E7%5C","%E8%5C",
							"%E9%5C","%EA%5C","%FA%78","%FB%78"
							);

		$replace_array = array("%81%FF","%83%FF","%84%FF","%87%FF",
							"%89%FF","%8A%FF","%8B%FF","%8C%FF",
							"%8D%FF","%8E%FF","%8F%FF","%90%FF",
							"%91%FF","%92%FF","%93%FF","%94%FF",
							"%95%FF","%96%FF","%97%FF","%98%FF",
							"%99%FF","%9A%FF","%9B%FF","%9C%FF",
							"%9D%FF","%9E%FF","%9F%FF","%E0%FF",
							"%E1%FF","%E2%FF","%E3%FF","%E4%FF",
							"%E5%FF","%E6%FF","%E7%FF","%E8%FF",
							"%E9%FF","%EA%FF","%FA%78","%FB%78"
							);

		$encoded = rawurlencode(trim($str));
		
		$encoded = str_replace($search_array, $replace_array, $encoded);
		//ErrorLogger::doOutput("0000000000000:" . $encoded, 0);

		// この時点で、5Cなのは純粋なバックスラッシュのみ。
		// 文字化け回避した文字を最後の文字とする文字列に分割
		if (strpos($encoded, "%FF") !== false) {
			$tmp_list = explode("%FF", $encoded);
			
			$tmp_list2 = array();
			for ($x = 0; $x < count($tmp_list); $x++) {
				
				if ($x == count($tmp_list) - 1) {
					$tmp = $tmp_list[$x];
				} else {
					$tmp = $tmp_list[$x] . "%FF";
				}
				
				array_push($tmp_list2,  $tmp);
				//ErrorLogger::doOutput("X" . $tmp, 0);
			}
			
		} else {
			$tmp_list2 = array($encoded);
		}
		
		$count = 0;
		
		$tmp_result = "";
		foreach ($tmp_list2 as $tmp) {
			//ErrorLogger::doOutput("X" . $count . ":" . $tmp, 0);
			$count++;
			
			$tail = "";
			
			// 最後がバックスラッシュだったら、とりあえず除外。
			while (true) {
				if ((strlen($tmp) >= 6)
				 && ((substr($tmp, -3) == "%5C") || (substr($tmp, -3) == "%27"))){
					$tail = substr($tmp, -3) . $tail;
					$tmp = substr($tmp, 0, strlen($tmp) - 3);
				} else {
					break;
				}
			}
			
			//ErrorLogger::doOutput("MAIN:" . $tmp, 0);
			//ErrorLogger::doOutput("TAIL:" . $tail, 0);
			
			// 置換された最後の文字が、5C単独なのか、2バイト文字なのかを判別
			$add_escape = false;
			if ((strlen($tmp) >= 6) && (substr($tmp, -3) == "%FF")) {
				
				$x = -3;
				$ok_count = 0;
				while (true) {
					$chr = substr($tmp, $x -3, $x);
					
					if ($chr == "") {
						break;
					}
					
					if (!SjisUtil::isSJISFirstByte($chr)) {
						break;
					} else {
						$ok_count++;
					}
					
					$x -= 3;
				}
				
				// 文字化け対象として、FFに置換された文字が、その前のSJIS2バイト文字の一部であるかを、
				// SJISの先頭文字である「81〜9F」と「E0〜EF」を2バイト文字が使われていないところから、
				// 奇数であるか偶数であるかで判断する。
				if (($ok_count > 0) && ($ok_count % 2 == 1 )) {
					// 奇数はそのまま→文字の一部
					//ErrorLogger::doOutput("ODD", 0);
				} else {
					// 偶数なら、1こ余計にエスケープ→たまたまエスケープ文字と同じ形になったバックスラッシュ
					//ErrorLogger::doOutput("EVEN", 0);
					$add_escape = true;
				}
				
			} else {
				// 2バイト文字の長さが無い
			}

			
			// バックスラッシュだけをエスケープ
			$tmp = str_replace("%5C", "%5C%5C", $tmp);
			$tail = str_replace("%5C", "%5C%5C", $tail);
			$tmp = str_replace("%27", "%5C%27", $tmp);
			$tail = str_replace("%27", "%5C%27", $tail);
		
			// 元に戻す。
			$tmp = str_replace($replace_array, $search_array, $tmp);
			
			// 間違ってFFエスケープされてしまった5Cをエスケープ
			if ($add_escape) {
				$tmp .= "%5C";
			}
			

			// 切り取った部分を元に戻す。
			$tmp_result .= $tmp . $tail;
		
		
		}
		
		// やっと元に戻れた
		$raw = rawurldecode($tmp_result);
		
		
		//ErrorLogger::doOutput("4444444444444:" . $raw, 0);
		
		return $raw;
	}
	
	/**
	 * 対象の文字列が、SJIS文字の頭の2文字である「81〜9F」と「E0〜EF」に含まれるかどうか?
	 */
	public function isSJISFirstByte($str) {
		
		if (preg_match("/^%[89EF][0-9A-F]$/", $str)) {
			//ErrorLogger::doOutput("OK:" . $str, 0);
			return true;
		}
		
		//ErrorLogger::doOutput("NOT FIRST BYTE:" . $str, 0);
		return false;
	}
	
	
	

}

?>