01|北京,02|上海,03|天津,04|重庆,05|黑龙江,06|吉林,07|辽宁,08|内蒙古,09|河北,10|山西,11|陕西,12|山东,13|新疆,14|西藏,15|青海,16|甘肃,17|宁夏…(以下省略) |
020101|上海,020102|闵行,020103|宝山,020104|嘉定,020105|南汇,020106|金山,020107|青浦,020108|松江,020109|奉贤,020110|崇明,020111|徐家汇,020112|浦东 |
来自: http://hi.baidu.com/gushu/blog/item/6655d60770e052c67b8947c0.html
附抓取id的方法:
<?php
set_time_limit(0);
session_start();
$WeatherHost = “http://www.weather.com.cn”;
$WeatherFile = “CityData.txt”;
$errorFile = “errorLog.txt”;
$action = isset($_GET[‘action’])?$_GET[‘action’]:”;
$type = isset($_GET[‘type’])==’GET’?$_GET[‘type’]:”;
$id = isset($_GET[‘id’])==’GET’?$_GET[‘id’]:”;
?>
<html>
<head>
<title>:::获取weather.com.cn城市 ID 数据:::</title>
<META http-equiv=Content-Type content=”text/html; charset=utf-8″>
</head>
<style type=”text/css”>
ul {list-style-type:none; }
#showdata { width:500px; text-align:center;}
#showdata ul{ width:498px; float:left;}
#showdata li { width:82px; height:30px; border:1px double #ccc; float:left; padding:5px; }
</style>
<body>
<div id=”showdata”>
<?php
if($action == “province”){
$_SESSION[‘WeatherId’] = 2;
$_SESSION[‘ErrorLog’] = array();
$_SESSION[‘errorLevel’] = 0;
$_SESSION[‘data’] = array();
$msg = getCity(“”,1,”province”);
if($msg == true) showmsg(“获取省份数据成功”,$_SERVER[‘PHP_SELF’].”?action=region”);
else showmsg(“获取省份数据失败”,$_SERVER[‘PHP_SELF’]);
}elseif($action == “region”){
foreach($_SESSION[‘data’] as $d){
if($d[4] == “province”)
$msg = getCity($d[2],$d[0],”region”);
}
if($msg == true) showmsg(“获取地区数据成功”,$_SERVER[‘PHP_SELF’].”?action=city”);
else showmsg(“获取地区数据失败”,$_SERVER[‘PHP_SELF’]);
}elseif($action == “city”){
foreach($_SESSION[‘data’] as $d){
if($d[4] == “region”)
$msg = getCity($d[2],$d[0],”file”);
}
if($msg == true) showmsg(“获取城市数据成功”,$_SERVER[‘PHP_SELF’].”?action=getid”);
else showmsg(“获取城市数据失败”,$_SERVER[‘PHP_SELF’]);
}elseif($action == “getid”){
foreach($_SESSION[‘data’] as $d){
if($d[4] == “file”)
$msg = getid($d,”city”);
}
//print_r($_SESSION[‘data’]);exit;
if($msg == true) showmsg(“获取城市ID成功”,$_SERVER[‘PHP_SELF’].”?action=write”);
else showmsg(“获取城市ID失败”,$_SERVER[‘PHP_SELF’]);
}elseif($action == “write”){
//print_r($_SESSION);exit;
if(is_array($_SESSION[‘ErrorLog’])) writeError($_SESSION[‘ErrorLog’]);
if(writeData($_SESSION[‘data’])) showmsg(“写入数据成功”,$_SERVER[‘PHP_SELF’]);
else showmsg(“写入数据失败”,$_SERVER[‘PHP_SELF’]);
}elseif($action == “list”){
require_once($WeatherFile);
echo “<ul>\n”;
foreach($CityData as $d){
if($type == “region”){
if($d[4] == “region” && $d[1] == $id) echo “<li><a href=’?action=list&type=city&id=”.$d[0].”‘>”.$d[3].”</a></li>\n”;
}elseif($type == “city”){
if($d[4] == “city” && $d[1] == $id) echo “<li><a href=’?action=getdata&id=”.$d[2].”‘>”.$d[3].”</a></li>\n”;
}elseif($type == “province”){
if($d[4] == “province” && $d[1] == $id) echo “<li><a href=’?action=list&type=region&id=”.$d[0].”‘>”.$d[3].”</a></li>\n”;
}else{
if($d[4] == “country” && $d[1] == “0”) echo “<li><a href=’?action=list&type=province&id=”.$d[0].”‘>”.$d[3].”</a></li>\n”;
}
}
echo “</ul>\n”;
}elseif($action == “getdata”){
echo getData(“/html/weather/”.trim($id).”.shtml”);
}
?>
</div>
<p align=”center”><a href=”<?php echo $_SERVER[‘PHP_SELF’].”?action=province”; ?>”>准备开始获取weather.com.cn城市数据</a></p>
<p align=”center”><a href=”<?php echo $_SERVER[‘PHP_SELF’].”?action=list”; ?>”>准备开始读取weather.com.cn城市数据</a></p>
</body>
</html>
<?php
///////////////// function ////////////////
function getCity($id,$pid,$type = “dir”){
global $_SESSION,$WeatherHost;
$msg = false;
$url = $WeatherHost.”/data/list3/city”.$id.”.xml”;
$fp = file_get_contents($url);
if($fp != “”){
if($_SESSION[‘errorLevel’] >= 3) $_SESSION[‘errorLevel’] = 0;
$content = explode(“,”,$fp);
if(is_array($content)){
foreach($content as $d){
$arr = explode(“|”,$d);
//$data[] = “array(“.$WeatherId.”,”.$pid.”,'”.$arr[0].”‘,'”.$arr[1].”‘,'”.$type.”‘),\n”;
$_SESSION[‘data’][$_SESSION[‘WeatherId’]] = array($_SESSION[‘WeatherId’],$pid,$arr[0],$arr[1],$type);
echo “获取[“.$_SESSION[‘WeatherId’].”-“.$pid.”-“.$arr[0].”-“.$arr[1].”]成功<br />\n”;
$_SESSION[‘WeatherId’] ++;
}
$msg = true;
}
}else{
if($_SESSION[‘errorLevel’] < 3){ //如果获取失败,重新获取三次,如果失败放弃,写入LOG文件
$_SESSION[‘errorLevel’] ++;
$msg = getCity($id,$pid,$type);
}else{
$_SESSION[‘ErrorLog’][] = $id.” – “.$pid.” – :”.$url.”\n”;
echo “获取[“.$_SESSION[‘WeatherId’].”-“.$pid.”-“.$arr[0].”-“.$arr[1].”]失败<br />\n”;
}
}
return $msg;
}
function getid($arr,$type = “id”){
global $_SESSION,$WeatherHost;
$msg = false;
$url = $WeatherHost.”/data/list3/city”.$arr[2].”.xml”;
$fp = file_get_contents($url);
if($fp != “”){
if($_SESSION[‘errorLevel’] >= 3) $_SESSION[‘errorLevel’] = 0;
$content = explode(“|”,$fp);
if(is_array($content)){
$_SESSION[‘data’][$arr[0]] = array($arr[0],$arr[1],$content[1],$arr[3],$type);
echo “获取[“.$arr[0].”-“.$arr[1].”-“.$arr[3].”:”.$url.”]成功<br />\n”;
}
$msg = true;
}else{
if($_SESSION[‘errorLevel’] < 3){ //如果获取失败,重新获取三次,如果失败放弃,写入LOG文件
$_SESSION[‘errorLevel’] ++;
$msg = getid($arr,$type);
}else{
$_SESSION[‘ErrorLog’][] = $arr[0].” – “.$arr[1].” – :”.$url.”\n”;
echo “获取[“.$arr[0].”-“.$arr[0].”-“.$arr[1].”-“.$arr[3].”:”.$url.”]失败<br />\n”;
}
}
return $msg;
}
function writeData($arr){
global $WeatherFile;
if(is_array($arr)){
foreach($arr as $d){
$data[] = “array(“.$d[0].”,”.$d[1].”,'”.$d[2].”‘,'”.$d[3].”‘,'”.$d[4].”‘)”;
}
// print_r($data);exit;
if(is_array($data)) $data = implode(“,\n”,$data);
$data = “<“.”?php\n \$CityData = array(\n”.$data.”\n);\n ?>”;
$fp = fopen($WeatherFile,”w”);
if(fwrite($fp,$data)){
fclose($fp);
return true;
}
}
return false;
}
function getData($url,$day = 3){
global $WeatherHost;
$data = false;
$fp = file_get_contents($WeatherHost.$url);
$tag = “|.*<div class=\”weatherYubaoBox\”>(.*)<\/div>.*<div.*weatherYubaoBox\”>(.*)<\/div>.*|isU”;
$tag2 = “|.*<title>(.*)<\/title>.*|isU”;
if($fp != “”){
preg_match($tag2,$fp,$title);
preg_match($tag,$fp,$data);
$search = array(“width=\”100%\””,”border=\”0\””,”cellspacing=\”0\””,”cellpadding=\”0\””,”src=\”/”);
$replace = array(“width=\”480\””,”border=\”1\””,”cellspacing=\”1\””,”cellpadding=\”1\””,”src=\””.$WeatherHost.”/”);
//print_r($data);print_r($title);exit;
($day == 7) ? $data = $data[1].$data[2] : $data = $data[1];
$data = str_replace($search,$replace,$data);
}
return $data;
}
function writeError($arr){
global $errorFile;
if(is_array($arr)){
$str = implode(“\n”,$arr);
$fp = fopen($errorFile,”w”);
fwrite($fp,$str);
fclose($fp);
}
}
function showmsg($msg,$url){
?>
<html>
<head>
<TITLE>:::<?php echo $mysitename; ?>:::系统信息提示:::</TITLE>
<META http-equiv=Content-Type content=”text/html; charset=utf-8″>
<meta HTTP-EQUIV=REFRESH CONTENT=”2; URL=<?php echo $url; ?>”>
</head>
<body>
<p align=”center” style=”height:200px;”><a href=”<?php echo $url; ?>”>下一步</a></p>
<p align=”center”><?php echo $msg; ?></p>
<script type=”text/javascript”>
setTimeout(‘Hide(msgboard)’,30000);
function mylocation(){
window.location =”<?php echo $url;?>”;
}
setTimeout(‘mylocation()’,3005);
</script>
</body>
</html>
<?php
}
?>