dom解析
python">from xml.dom.minidom import parse
import xml.dom.minidom# 使用minidom解析器打开 XML 文档
DOMTree = xml.dom.minidom.parse("xxx.xml")
collection = DOMTree.documentElement# 根据标签名称获取
movies = collection.getElementsByTagName("fistTag")[0]
movies = movies.getElementsByTagName('secondTag')
rpmList = []
for movie in movies:# 内容获取方式xxx.childNodes[0].datastrs = movie.childNodes[0].data.split(',')for str in strs:rpmList.append(str.strip())
另还有sax解析,可更好产生bean类
python">from openpyxl import load_workbookclass RpmData:def __init__(self, name, incFileNum):self.name = nameself.incFileNum = incFileNumdef get_index(char):return ord(char) - ord('A')
import re
def get_rpm_package_name(rpm_name):# 正则表达式匹配RPM包名match = re.match(r"^(?P<name>[^-]+)-(?P<version>[^-]+)-(?P<release>.+)\.rpm$", rpm_name)if match:return match.group('name')else:raise ValueError("Invalid RPM package name format")def extract_name(rpm_package_name):parts = rpm_package_name.split('-')return '-'.join(parts[:len(parts)-2])wb = load_workbook(r'xxx.xlsx')
sheet = wb['rpm视图']# os.remove("sample.xlsx")from openpyxl import Workbook
# 创建一个新的工作簿
dest_wb = Workbook()
# ws = dest_wb.create_sheet(title="rpm视图")
ws = dest_wb.activex=1
y=1for row in sheet.iter_rows(min_row=3):incFileNum = row[get_index('Q')].valueif incFileNum <= 0.0:continuerpmName = extract_name(row[get_index('A')].value)if rpmName in rpmList:y=1for cell in row:# destCell = ws[cell.coordinate]destCell = ws.cell(row=x, column=y)destCell.value = cell.valuey+=1# cell.parent = ws# ws.append(row)x+=1# rpmDataList.append(RpmData(row[get_index('A')].value, row[get_index('Q')].value))# for cell in row:# print(cell.value)
dest_wb.save("sample.xlsx")
当原excel有引用时,实际只要cell.parent = ws,就可复制,但文件格式会有点问题
QA
python文件的时候竟然报SyntaxError: (unicode error) ‘unicodeescape’ codec can’t decode bytes in position 2-3: tr这个错误,其实引起这个错误的原因就是转义的问题
df = pd.read_excel(r'xxx.xlsx')
xlrd.biffh.XLRDError: Excel xlsx file; not supported
高版本不支持xlsx