python程序如下:
import re,urllib
strTxt=""
x=1
ff=open("wangzhi.txt","r")
for line in ff.readlines():
f=open(str(x)+".txt","w+")
print line
n=re.findall(r"
(.*?)<\/p>",urllib.urlopen(line).read(),re.M)
for i in n:
if len(i)!=0:
i=i.replace(" ","")
i= i.replace("","")
i = i.replace("","")
strTxt = strTxt + i
strTxt = re.sub(r"", r"", strTxt)
strTxt=re.sub(r"",r"",strTxt)
strTxt=re.sub(r"(.*?)",r"", strTxt)
strTxt = re.sub(r"<\/[Aa]>", r"", strTxt)
#print strTxt
f.write(strTxt)
strTxt=""
f.close
x=x+1
ff.close()