1.提取省市全称
import repattern = r'^(.*?(?:省|自治区|维吾尔族自治区|壮族自治区|回族自治区))(.*?(?:市|州|盟))'
file_names = ["浙江省舟山市", "云南省西双版纳州", "新疆维吾尔族自治区伊犁州", "内蒙古自治区阿拉善盟","哈哈哈"]for file_name in file_names:matches = re.findall(pattern, file_name)if matches:province, city = matches[0]print(province,city)else:print("未匹配到省份和市")
2.提取省市简称
import repattern = r'^(.*?)(?:省|自治区|维吾尔族自治区|壮族自治区|回族自治区)(.*?)(?:市|州|盟)'
file_names = ["浙江省舟山市", "云南省西双版纳州", "新疆维吾尔族自治区伊犁州", "内蒙古自治区阿拉善盟","哈哈哈"]for file_name in file_names:matches = re.findall(pattern, file_name)if matches:province, city = matches[0]print(province,city)else:print("未匹配到省份和市")
3.提取省简称,市全称
import repattern = r'^(.*?)(?:省|自治区|维吾尔族自治区|壮族自治区|回族自治区)(.*?(?:市|州|盟))'
file_names = ["浙江省舟山市", "云南省西双版纳州", "新疆维吾尔族自治区伊犁州", "内蒙古自治区阿拉善盟","哈哈哈"]for file_name in file_names:matches = re.findall(pattern, file_name)if matches:province, city = matches[0]print(province,city)else:print("未匹配到省份和市")