King James圣经(传说中的KJV,即King James Version,是17世纪英王詹姆斯一世的许可下的圣经钦定译本)的txt文件,大小约为5.5Mb,在大文本中进行单词查找,一次性读取再进行查找,会占用较大的内存,分行/分块 读取,再进行查找,可以减少内存的使用
<?php //起始内存占用 $memBegin = memory_get_usage(); //关键词 $keyword = $argv[1]?$argv[1]:'God'; //是否大小写敏感 $caseSenstive = $argv[2]==1?true:false; //单词匹配正则 $regx = $caseSenstive?"/{$keyword}/":"/{$keyword}/i"; //行查找函数 $lineFindFunc = $caseSenstive?"strpos":"stripos"; //http://www.ccel.org/ccel/bible/kjv.txt $filename = 'kjv.txt'; $fp = fopen($filename,'rb'); $ret = array(); $lineCount = 1; $demoLine = 285; //如果文件未读完,则一直继续 while(!feof($fp)){ //读取一行 $buffer = fgets($fp); //匹配单词并计数 if(preg_match_all($regx,$buffer,$matches)){ $num = count($matches[0]); //在该行查找单词 $ret[$lineCount] = implode(',',findInLine($buffer,$keyword,$num)); if($lineCount==$demoLine){ $demoContent = trim($buffer); } } $lineCount++; } //结束时的内存占用 $memEnd = memory_get_usage(); //输出结果 $out = "keyword is {$keyword},case :".intval($caseSenstive)."\n"; $out.= "demoLine:{$demoLine}\n"; $out.= "demoContent:{$demoContent}\n"; $out.= "search result:".$ret[285]."\n"; $out.= "memory usage:".($memEnd-$memBegin)/(1024*1024)."Mb\n"; echo $out; /** * 行内查找函数 * @param $line string 行文本 * @param $keyword string 关键词 * @param $num int 关键词出现的次数 * @return string 关键词出现的位置 1,4,5 */ function findInLine($line,$keyword,$num){ global $lineFindFunc; $i = 0; $offset = 0; $ret = array(); $line = trim($line); while($i<$num){ $pos = $lineFindFunc($line,$keyword,$offset); if($pos!==false){ $ret[] = $pos; $offset = ($pos+1); $i++; } } return $ret; }
运行:php searchword.php god 0
运行结果:
keyword is god,case :0
demoLine:285
demoContent:without the word of God? What word of God (whereof we may be sure)
search result:20,38
memory usage:1.223258972168Mb
附:最好的圣经app YouVersion下载