1.输入文件
2.代码
python">#title:lin_count_ATCG_of_fasta.py
#usage:python in_count_ATCG_of_fasta.py >output
def read_fasta(filename):"""Read a FASTA file and return a dictionary of sequences."""sequences = {}current_seq = ''seq_id = ''with open(filename, 'r') as file:for line in file:if line.startswith('>'):if current_seq:sequences[seq_id] = current_seqcurrent_seq = ''seq_id = line.strip().split(None, 1)[0][1:] # Take the first word after '>', remove '>'else:current_seq += line.strip().upper()if current_seq:sequences[seq_id] = current_seqreturn sequencesdef calculate_atcg_percentages(sequences):"""Calculate and print the ATCG percentage of each sequence in the dictionary."""atcg_stats = {}total_a, total_t, total_c, total_g = 0, 0, 0, 0num_sequences = len(sequences)for seq_id, sequence in sequences.items():total_length = len(sequence)a_count = sequence.count('A')t_count = sequence.count('T')c_count = sequence.count('C')g_count = sequence.count('G')atcg_stats[seq_id] = {'A': (a_count / total_length) * 100 if total_length > 0 else 0,'T': (t_count / total_length) * 100 if total_length > 0 else 0,'C': (c_count / total_length) * 100 if total_length > 0 else 0,'G': (g_count / total_length) * 100 if total_length > 0 else 0}total_a += a_counttotal_t += t_counttotal_c += c_counttotal_g += g_count# Calculate overall percentagestotal_nucleotides = total_a + total_t + total_c + total_goverall_percentages = {'A': (total_a / total_nucleotides) * 100 if total_nucleotides > 0 else 0,'T': (total_t / total_nucleotides) * 100 if total_nucleotides > 0 else 0,'C': (total_c / total_nucleotides) * 100 if total_nucleotides > 0 else 0,'G': (total_g / total_nucleotides) * 100 if total_nucleotides > 0 else 0}return atcg_stats, overall_percentagesdef main():import sysif len(sys.argv) < 2:print("Usage: python script.py <fasta_file>")returnfasta_file = sys.argv[1]sequences = read_fasta(fasta_file)atcg_stats, overall_percentages = calculate_atcg_percentages(sequences)print("Sequence_ID\tA(%)\tT(%)\tC(%)\tG(%)")for seq_id, stats in atcg_stats.items():print(f"{seq_id}\t{stats['A']:.2f}\t{stats['T']:.2f}\t{stats['C']:.2f}\t{stats['G']:.2f}")if __name__ == "__main__":main()
3.输出文件: