数据库:MySQL
平台:.net framework 2.0 (C#)
组建:Winista.Text.HtmlParser
演示页面:2013年3月7日南京市各区县农贸市场主副食品价格对比表 http://www.njprice.com/col71/col464/articleinfo.php?infoid=44181
2013年2月28日南京市各区县农贸市场主副食品价格对比表 http://www.njprice.com/col71/col464/articleinfo.php?infoid=44079
以及所有其他日期发布的《南京市各区县农贸市场主副食品价格对比表》
using System;
using System.Collections.Generic;
using System.Text;
using nanjing_price.WebUtility;
using Winista.Text.HtmlParser;
using Winista.Text.HtmlParser.Lex;
using Winista.Text.HtmlParser.Util;
using Winista.Text.HtmlParser.Tags;
using Winista.Text.HtmlParser.Filters;
using org.nutlab;namespace nanjing_price.Fuction
{class NanjingMain{string content;string urlStr;public NanjingMain(string url){this.urlStr = url;getContent();inputDB();}void getContent(){webUtility web = new webUtility();content = Tools.filterScript(web.getContent(urlStr));}void inputDB(){DateTime publishTime = new DateTime();good_price price = new good_price();Parser parser = Parser.CreateParser(Tools.filterTableAttribute(content.Replace(System.Environment.NewLine, "")), "gb2312");NodeFilter table = new TagNameFilter("table");INode Table = parser.Parse(table)[2].Children[3];parser = Parser.CreateParser(Table.ToHtml(), "gb2312");Table = parser.Parse(table)[3];parser = Parser.CreateParser(Table.ToHtml(), "gb2312");Table = parser.Parse(table)[2];Console.WriteLine(Table.ToHtml());INode tempTag = Table;TableTag tableTag = (TableTag)tempTag;//上面判断如果得到的第一个为table Winista.Text.HtmlParser.Tags.TableRow[] tr = tableTag.Rows;//得到该table所有的trTableColumn[] tc = tr[2].Columns;publishTime = Convert.ToDateTime(tc[0].ToPlainTextString().Trim().Substring(3));Console.WriteLine(publishTime);for (int i = 6; i < tr.Length; i++){tc = tr[i].Columns;for (int j = 3; j < tc.Length; j++){price.name = tc[0].ToPlainTextString().Trim();price.standard = tc[1].ToPlainTextString().Trim();price.unit = tc[2].ToPlainTextString().Trim();price.district_name = tr[4].Columns[j].ToPlainTextString().Trim();price.market_name = tr[5].Columns[j].ToPlainTextString().Trim();price.amount = tc[j].ToPlainTextString().Trim();price.publish_time = publishTime;price.get_time = System.DateTime.Now;price.get_url = urlStr;price.Add();}}}}
}
源代码下载: 点击下载
SVN: Google Code