#要求:给一个大数据集群的日志文件,每行日志记录了一个事件,格式如下:
timestamp event_type event_details
比如:
3123123123 START JobA started
3123123123 END JobA over
3123123123 ERROR JobA started
3123123123 END JobA started
python"># 读取日志
with open('test.log', mode='r') as f:data = f.readlines()# 解析数据
for line in data:print(line)# 读取event数据events = line.split(' ')[1:2] #切分后,通过切片获取事件字段# 统计次数log_count = {}for event in events:if event in log_count:log_count[event] += 1else:log_count[event] = 1
# 输出次数
for event, count in log_count.items():print(f'{event}:{count}')