#coding:utf-8
importosfrom bs4 importBeautifulSoup#jsp 路径
folderPath = "E:/whm/google/src_jsp"
for dirPath,dirNames,fileNames inos.walk(folderPath):for fileName infileNames:if fileName.endswith(".jsp"):
soup=BeautifulSoup(open(os.path.join(dirPath,fileName)),"html.parser")if(soup.header is notNone):
soup.header.extract()#属性选择器。。。只能选择出第一个符合规则的元素
if(soup.find(attrs={‘role‘:‘banner‘}) is notNone):
soup.find(attrs={‘role‘:‘banner‘}).extract()if(soup.find(attrs={‘class‘:"col-xs-3"}) is notNone):
soup.find(attrs={‘class‘:"col-xs-3"}).extract()
with open(os.path.join(dirPath,fileName),"w+") as file:#pretify()方法返回一个美化过的html 字符串 encode(‘utf-8‘)指定编码--
file.write(soup.prettify(formatter=None).encode(‘utf-8‘))