PHP采集代码
采集http://www.01job.cn/asp/itjob.asp该页面中职位列表头三条的记录
看了很多的PHP采集教程了 还是不会写 所以这里请教了....
以下是我的代码,运行时了错:
include("function.php");
$url="http://www.01job.cn/asp/itjob.asp";
$ft["title"]["begin"]="
$rs=pick($url,$ft,$th);
echo $rs["title"];
echo "
内容:".$rs["content"];
?>
function.php
<?php
//获取网页内容
Function fetch_urlpage_contents($url){
$c=file_get_contents($url);
return $c;
}
//获取匹配内容
Function fetch_match_contents($begin,$end,$c)
{
$begin=change_match_string($begin);
$end=change_match_string($end);
if(@preg_match("/{$begin}(.*?){$end}/i",$c,$rs))
{return $rs[1];}
else {return "";}
}
//转义正则表达式字符串
Function change_match_string($str){
//注意,以下只是简单转义
$old=array("/","$");
$new=array("\/","\$");
$str=str_replace($old,$new,$str);
return $str;
}
//采集网页
Function pick($url,$ft,$th)
{
$c=fetch_urlpage_contents($url);
foreach($ft as $key => $value)
{
$rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c);
if(is_array($th[$key]))
{ foreach($th[$key] as $old => $new)
{
$rs[$key]=str_replace($old,$new,$rs[$key]);
}
}
}
return $rs;
}
?>
------解决方案--------------------
//获取网页内容
Function fetch_urlpage_contents($url){
for($i=0;$i {
$c=@file_get_contents($url);
if(trim($c) != "")break;
}
// print($c);
return $c;
}
//获取匹配内容
Function fetch_match_contents($begin,$end,$c)
{
$beginPos = strpos($c,$begin);
$endPos = strpos($c,$end);
if($beginPos > 0 && $endPos > 0 && $endPos > $beginPos)
{
$result = substr($c,$beginPos+strlen($begin),$endPos - $beginPos-strlen($begin));
return $result;
}
else
{
return "";
}
}
//采集网页
Function pick($url,$ft,$th)
{
$c=fetch_urlpage_contents($url);
foreach($ft as $key => $value)
{
$rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c);
if(is_array($th[$key]))
{ foreach($th[$key] as $old => $new)
{
$rs[$key]=str_replace($old,$new,$rs[$key]);
}
}
}
return $rs;
}
?>
$url="http://www.01job.cn/asp/itjob.asp";
$ft["title"]["begin"]="
$ft["title"]["end"]="
$rs=pick($url,$ft,$th);
print_r($rs);
?>
------瑙e