此代码是结合代码11的
需要将正例即为1的query减少一半
f0 = open('/home/xbwang/croschangetrain/data/sick/train/a.toks','r') f1 = open('/home/xbwang/croschangetrain/data/sick/train/b.toks','r') f2 = open('/home/xbwang/croschangetrain/data/sick/train/sim.txt','r') a = f0.readlines() b = f1.readlines() c = f2.readlines() length = len(a) lis = [] for i in range(length):lis.append(a[i]+'==='+b[i]+'==='+c[i]) num = 0 for j in range(4500):label = lis[j].split('===')[2]if (num == 2001):breakif (label == '1\n'):lis.pop(j)num = num+1 f3 = open('/home/xbwang/Desktop/a','a') f4 = open('/home/xbwang/Desktop/b','a') f5 = open('/home/xbwang/Desktop/c','a') length1 = len(lis) for k in range(length1):tag = lis[k].split('===')f3.write(tag[0])f4.write(tag[1])f5.write(tag[2])