最近遇到了一个需求是解析文件夹下所有的json文件,如下所示为数据目录,每个子文件下各存入了17年到21年的空气质量数据。
原始数据是这种json格式的以黄山 2020年10月做一个引例:
{"title": ["\u65e5\u671f", "\u8d28\u91cf\u7b49\u7ea7", "AQI\u6307\u6570", "\u5f53\u5929AQI\u6392\u540d", "PM2.5", "PM10", "So2", "No2", "Co", "O3"], "data": {"2020-10-01": ["2020-10-01", "\u826f", "58", "275", "30", "57", "9", "26", "0.88", "99"], "2020-10-02": ["2020-10-02", "\u826f", "70", "340", "44", "62", "9", "25", "0.96", "101"], "2020-10-03": ["2020-10-03", "\u4f18", "44", "243", "30", "23", "8", "9", "0.83", "103"], "2020-10-04": ["2020-10-04", "\u4f18", "41", "252", "24", "42", "11", "18", "0.75", "58"], "2020-10-05": ["2020-10-05", "\u4f18", "42", "253", "16", "42", "11", "20", "0.58", "63"], "2020-10-06": ["2020-10-06", "\u4f18", "48", "252", "28", "50", "10", "28", "0.77", "60"], "2020-10-07": ["2020-10-07", "\u4f18", "38", "212", "19", "35", "9", "20", "0.57", "73"], "2020-10-08": ["2020-10-08", "\u4f18", "38", "145", "14", "37", "9", "22", "0.50", "78"], "2020-10-09": ["2020-10-09", "\u4f18", "40", "135", "14", "38", "10", "25", "0.45", "74"], "2020-10-10": ["2020-10-10", "\u4f18", "46", "143", "17", "43", "10", "32", "0.45", "77"], "2020-10-11": ["2020-10-11", "\u826f", "54", "198", "21", "51", "10", "33", "0.56", "91"], "2020-10-12": ["2020-10-12", "\u826f", "53", "213", "27", "56", "9", "24", "0.63", "84"], "2020-10-13": ["2020-10-13", "\u826f", "58", "245", "30", "66", "8", "19", "0.63", "87"], "2020-10-14": ["2020-10-14", "\u4f18", "41", "222", "16", "42", "8", "22", "0.45", "73"], "2020-10-15": ["2020-10-15", "\u4f18", "27", "83", "14", "27", "8", "24", "0.49", "69"], "2020-10-16": ["2020-10-16", "\u4f18", "23", "86", "10", "14", "7", "16", "0.52", "66"], "2020-10-17": ["2020-10-17", "\u4f18", "49", "227", "34", "49", "8", "23", "0.68", "72"], "2020-10-18": ["2020-10-18", "\u826f", "62", "258", "44", "70", "9", "47", "0.83", "70"], "2020-10-19": ["2020-10-19", "\u826f", "62", "244", "41", "64", "8", "44", "0.73", "85"], "2020-10-20": ["2020-10-20", "\u826f", "59", "195", "38", "66", "11", "51", "0.77", "75"], "2020-10-21": ["2020-10-21", "\u826f", "55", "183", "38", "58", "9", "21", "0.70", "81"], "2020-10-22": ["2020-10-22", "\u826f", "99", "315", "49", "148", "12", "35", "0.75", "65"], "2020-10-23": ["2020-10-23", "\u826f", "65", "211", "25", "82", "8", "42", "0.59", "50"], "2020-10-24": ["2020-10-24", "\u826f", "59", "146", "33", "70", "8", "62", "0.79", "48"], "2020-10-25": ["2020-10-25", "\u826f", "74", "246", "47", "99", "7", "83", "0.94", "45"], "2020-10-26": ["2020-10-26", "\u826f", "88", "260", "61", "122", "11", "86", "1.05", "46"], "2020-10-27": ["2020-10-27", "\u826f", "75", "261", "50", "98", "11", "42", "0.87", "59"], "2020-10-28": ["2020-10-28", "\u826f", "53", "197", "33", "60", "8", "29", "0.71", "45"], "2020-10-29": ["2020-10-29", "\u826f", "58", "192", "28", "67", "7", "32", "0.74", "52"], "2020-10-30": ["2020-10-30", "\u826f", "61", "201", "32", "72", "7", "48", "0.71", "56"], "2020-10-31": ["2020-10-31", "\u826f", "59", "174", "28", "68", "7", "61", "0.85", "46"]}}
这是放入到json在线解析器上所显示的,因为在原始数据中有的汉字为Unicode编码,在线解析器可以直接解析这种编码。
code:
def AnalysisJson():file_path = "F:\\air\\input\\"for root,dirs,files in os.walk(file_path): # 遍历file_path下所有的子目录及文件for file in files: #遍历当前路径下所有非目录子文件InputPath = open(file_path + file, encoding="utf-8")# 设置以utf-8解码模式读取文件,encoding参数必须设置,否则默认以gbk模式读取文件,当文件中包含中文时,会报错temp = json.load(InputPath) #json格式数据转换为python字典类型base = temp["data"] # 因为此时已经转换为了字典类型,对key(data) 取值后可以得到每天的具体空气数据的value值OutTextPath = "F:\\air\\output\\result.txt"for i in base.values():AQI_str = ",".join(i) #将列表转换为csv的形式(以逗号分隔)fileOut = open(OutTextPath, "a", encoding='utf8')fileOut.write(AQI_str + '\n')fileOut.close()if __name__ == '__main__':AnalysisJson()
结果: