ZFOCR API

  • 2017-07-06
  • 0
  • 0

目前字典大致为4500条左右,识别率基本站稳85%,可以确保请求三次之中一定有一次是识别正确的,内网环境下单次完成时间在1.5s-2s,外网环境下完成时间可能会在2.5-3s之间,

PIN Identify by fangzheng.php 为主文件
PIN Identify lib.php 为其函数库
downloadImg.php 用于download验证码
AddDictionary.php 用于添加字典
zidian.sql 为字典,建一个叫’yzm’的数据库导入即可

 PS:目前认为运算效率的瓶颈在 similar_text()  这个核心函数上,函数的作用在于对比两个字符串的相似程度,但是其算法时间复杂度T(n) = O(n^3) ,所以准备用T(n) = O(m*n)动态规划实现一个相类似的函数去代替该函数,以提高性能,时间未定。。。

API REQUEST URL:

http://ocr.api.xsico.cn/v1/?img=

header('Content-Type: text/html;charset=utf-8');
include('./function/PIN Identify lib.php');
$yz = 10;//切割判断阈值
$all = array();
$save_to = './cache/' . time() . '.png';
function XCURL($url){
        $ip = mt_rand(1,255).".".mt_rand(1,255).".".mt_rand(1,255).".".mt_rand(1,255)."";
        $ch = curl_init();
		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); 
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); 
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch,CURLOPT_HTTPHEADER,array("CLIENT-IP".$ip."", "X_FORWARD_FOR:".$ip."")); 
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36");
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); 
        curl_setopt($ch, CURLOPT_TIMEOUT,15);
        curl_setopt($ch, CURLOPT_REFERER, "https://grades.xsico.cn/");  
        //开启Gzip提升数据传输速度
        curl_setopt($ch, CURLOPT_HTTPHEADER, array('Accept-Encoding:gzip'));
        curl_setopt($ch, CURLOPT_ENCODING, "gzip");
        $result=curl_exec($ch);
        curl_close($ch);
		return $result;
    }
$save = file_put_contents($save_to, XCURL($url));
$_info = getimagesize($save_to);

$info = array(
    'width' => $_info[0],
    'height' => $_info[1],
    'type' => image_type_to_extension($_info[2], false),
    'mime' => $_info['mime']
);
$fun = "imagecreatefrom{$info['type']}";//根据上面获取的格式判定应该使用哪种'imagecreatefrom***'函数
$image = $fun($save_to);

//二值化
for ($y = 0; $y < $info['height']; ++$y) {
    for ($x = 0; $x < $info['width']; ++$x) {
        $rgb = imagecolorat($image, $x, $y);
        $rgbArray = imagecolorsforindex($image, $rgb);
        if ($rgbArray['red'] < 110 && $rgbArray['green'] < 110 && $rgbArray['blue'] > 100) {
            $all[$y][$x] = '1';
        } else {
            $all[$y][$x] = '0';
        }
    }
}
imagedestroy($image);//摧毁内存中的图片

//去噪点
$all = remove_noise($info['height'], $info['width'], $all);//引用remove_noise函数获取去噪点后的二值化数组

$start_x1=$end_x1 =$start_x2=$end_x2=$start_x3=$end_x3=$start_x4 =$end_x4= 0;//切割用的x轴参考值
$start_y1 =$end_y1=$start_y2=$end_y2=$start_y3=$end_y3=$start_y4=$end_y4= 0;//切割用的y轴参考值

//切割
cutting($info['width'],$info['height']);//获取到x轴和y轴的切割用的参考值


//echo "x:S1:$start_x1,E1:$end_x1,S2:$start_x2,E2:$end_x2,S3:$start_x3,E3:$end_x3,S4:$start_x4,E4:$end_x4


"; //echo "y:S1:$start_y1,E1:$end_y1,S2:$start_y2,E2:$end_y2,S3:$start_y3,E3:$end_y3,S4:$start_y4,E4:$end_y4


"; $letter1 = $letter2 = $letter3 = $letter4 = array(); //获得切割坐标后截取 for ($y = $start_y1, $_y = 0; $y < $end_y1; ++$y, ++$_y) { for ($x = $start_x1, $_x = 0; $x < $end_x1; ++$x, ++$_x) { $letter1[$_y][$_x] = $all[$y][$x]; } } for ($y = $start_y2, $_y = 0; $y < $end_y2; ++$y, ++$_y) { for ($x = $start_x2, $_x = 0; $x < $end_x2; ++$x, ++$_x) { $letter2[$_y][$_x] = $all[$y][$x]; } } for ($y = $start_y3, $_y = 0; $y < $end_y3; ++$y, ++$_y) { for ($x = $start_x3, $_x = 0; $x < $end_x3; ++$x, ++$_x) { $letter3[$_y][$_x] = $all[$y][$x]; } } for ($y = $start_y4, $_y = 0; $y < $end_y4; ++$y, ++$_y) { for ($x = $start_x4, $_x = 0; $x < $end_x4; ++$x, ++$_x) { $letter4[$_y][$_x] = $all[$y][$x]; } } //展示切割后的结果和二值化后的数组 //show($info['height'], $info['width'], $all, $letter1, $letter2, $letter3, $letter4); $ocr = discern($letter1, $letter2, $letter3, $letter4);//识别 function delFileUnderDir( $dirName ) { if ( $handle = opendir( "$dirName" ) ) { while ( false !== ( $item = readdir( $handle ) ) ) { if ( $item != "." && $item != ".." ) { if ( is_dir( "$dirName/$item" ) ) { delFileUnderDir( "$dirName/$item" ); } else { //获取文件最后访问时间 $fileatime=fileatime("$dirName/$item"); //保留300S以内的Cache,避免影响已经触发的进程 if(($_SERVER["REQUEST_TIME"]-$fileatime)>5){ unlink("$dirName/$item"); } } } } closedir( $handle ); } } delFileUnderDir('cache');

评论

还没有任何评论,你来说两句吧

CC共享许可协议:BY-NC-SA 4.0 - 友情链接 - Theme by Qzhai