<?php set_time_limit(0); class snap_dianping { //抓取的分类,(*)代表页码 static $url = array( '川菜'=>'http://www.dianping.com/search/category/2/10/g317p(*)/g10g317', '北京菜' => 'http://www.dianping.com/search/category/2/10/g311p(*)/g10g311', '湘菜' => 'http://www.dianping.com/search/category/2/10/g318p(*)/g10g318', '鲁菜' => 'http://www.dianping.com/search/category/2/10/g322p(*)/g10g322', '湖北菜' => 'http://www.dianping.com/search/category/2/10/g319p(*)/g10g319', '江浙菜' => 'http://www.dianping.com/search/category/2/10/g313p(*)/g10g313', '粤菜' => 'http://www.dianping.com/search/category/2/10/g315p(*)/g10g315', '东北菜' => 'http://www.dianping.com/search/category/2/10/g321p(*)/g10g321', '新疆/清真' => 'http://www.dianping.com/search/category/2/10/g323p(*)/g10g323', '西北菜' => 'http://www.dianping.com/search/category/2/10/g324p(*)/g10g324', '云南菜' => 'http://www.dianping.com/search/category/2/10/g320p(*)/g10g320', '贵州菜' => 'http://www.dianping.com/search/category/2/10/g325p(*)/g10g325', '素菜' => 'http://www.dianping.com/search/category/2/10/g1859p(*)/g10g1859', '火锅' => 'http://www.dianping.com/search/category/2/10/g328p(*)/g10g328', '海鲜' => 'http://www.dianping.com/search/category/2/10/g1887p(*)/g10g1887', '小吃快餐' => 'http://www.dianping.com/search/category/2/10/g332p(*)/g10g332', '日本' => 'http://www.dianping.com/search/category/2/10/g1941p(*)/g10g1941', '韩国' => 'http://www.dianping.com/search/category/2/10/g1943p(*)/g10g1943' ); static $result = array(); static function snap_list($url,$typename='') { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_USERAGENT, "Baiduspider+(+http://www.baidu.com/search/spider.htm)"); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_POSTFIELDS, "form_email=".urlencode($a)."&form_password=$b"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_COOKIEJAR, $d); $w = curl_exec($ch); curl_close($ch); $strNew = preg_replace('/(/t+|/r|/n|/s{2,})/i', '', $w); //$w = preg_match_all('/<li(/s+)class="shopname"><a(/s+)href="//shop//(.{0,30})"(/s+)class="BL"title="(.{0,80})"(/s+)οnclick=/', $strNew, $temp); preg_match_all('/<dd><ul(/s+)class="remark"(.{1300,2700})<//p><//dd>/', $strNew, $temp); $temp = $temp[0]; $data = array(); $db = database::getInstance(); for($i=0; $i<count($temp); $i++) { preg_match_all('/<li(/s+)class="shopname"><a(/s+)href="//shop//(.{0,30})"(/s+)class="BL"title="(.{0,80})"(/s+)οnclick=/', $temp[$i], $str); $data[$i]['name'] = preg_replace('/"(/s+)kid="(.{0,8})/', '', $str[5][0]); //$str = ''; preg_match_all('/<li>地址: <a(/s+)href="(.{0,30})"(/s+)class="Black-H">(.{0,180})<//a>(.{0,180}) (.{0,10})<//li>/', $temp[$i], $str); $data[$i]['address'] = $str[5][0]; $data[$i]['phone'] = $str[6][0]; $data[$i]['city'] = $str[4][0]; if($str[5][0]) { $geo = self::snap_geo($str[5][0]); $data[$i]['lat'] = $geo['lat']; $data[$i]['lon'] = $geo['lon']; } preg_match_all('/<li>标签: (.{0,230})<//li>/', $temp[$i], $str); preg_match_all('/<a(.{0,110})">(.{0,30})<//a>/', $str[1][0], $strs); $data[$i]['tags'] = implode(',', $strs[2]); $db->query("INSERT INTO venue (name,cityName,phone,address,type,tags) VALUES ('".$data[$i]['name']."','".$data[$i]['city']."','".$data[$i]['phone']."','".$data[$i]['address']."','".$typename."','".$data[$i]['tags']."')"); //print_r($strs); } return $data; } static function snap_page() { foreach (self::$url as $key=>$val) { echo $key."<br/>"; $Page = 1; $t = true; while ($t) { $u = str_replace('(*)', $Page, $val); $Page++; echo $u."<br>"; flush(); sleep(30); $get = self::snap_list($u, $key); if(count($get)) { // print_r($get); // flush(); //self::$result = array_merge($get,self::$result); } else { $t = false; } } } return self::$result; } static function snap_geo($Address) { $data = array(); $googleAPI = "http://maps.google.com/maps/geo?output=json&oe=utf8&q=".urlencode($Address); $w['lat'] = ''; $w['lon'] = ''; //$w = json_decode(@file_get_contents($googleAPI)); $data['lat'] = $w->Placemark[0]->Point->coordinates[0]; $data['lon'] = $w->Placemark[0]->Point->coordinates[1]; return $data; } } ob_flush(); date_default_timezone_set('Asia/Shanghai'); class sys_conf_b { public static $DBHOST = "localhost"; public static $DBUSER = "ccom"; public static $DBPWD = "abc1234"; public static $DBNAME = "sns"; } class sys_conf { public static $DBHOST = "localhost"; public static $DBUSER = "root"; public static $DBPWD = "root"; public static $DBNAME = "sns"; } class database { private $host; private $user; private $pwd; private $name; private $_connection; protected static $_instance = null; //获取属性值 private function _get( $property_name ) { if(isset( $this->$property_name )) { return( $this->$property_name ); } else { return( null ); } } //设置属性值 private function _set( $property, $value ) { $this->property_name = $value; } //构造 private function __construct() { $this->host = sys_conf::$DBHOST; $this->name = sys_conf::$DBNAME; $this->pwd = sys_conf::$DBPWD; $this->user = sys_conf::$DBUSER; $this->_connection = mysql_connect($this->host, $this->user, $this->pwd ); mysql_query("SET NAMES UTF8"); mysql_select_db($this->name, $this->_connection ); } public static function getInstance() { if (null === self::$_instance) { self::$_instance = new self(); } return self::$_instance; } //析构 public function __destruct() { mysql_close($this->_connection ); } //增删改 public function execute($sql) { mysql_query($sql); } //查,返回值为数组对象 public function query($sql) { return mysql_query($sql, $this->_connection ); } //返回数组 public function query_array($sql) { $resultArr = array(); $result = mysql_query($sql, $this->_connection); while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) { $resultArr[] = $row; } return $resultArr; } } //print_r(snap_geo('静宁小区对面')); //print_r(snap_dianping::snap_list($page_url)); echo count(snap_dianping::snap_page()); ?> 这个程序可以直接用PHP执行.也可以用Web访问,建议用Linux后台执行它.这个是30抓一个列表,时间太紧会被封掉.