在 HTSJDK 库中,SAMSequenceDictionary
和 SAMSequenceRecord
类用于处理和管理基因组数据中的序列信息(contigs)。这两个类通常一起使用,提供了对基因组中所有 contig 的详细描述和访问。
SAMSequenceDictionary
类
主要功能
- 存储序列信息:
SAMSequenceDictionary
存储了一个基因组的所有 contig 的信息,包括 contig 的名称和长度。 - 提供访问接口:提供方法以获取特定 contig 的信息,方便进行序列数据的访问和操作。
SAMSequenceDictionary
类源码:
/** The MIT License** Copyright (c) 2009 The Broad Institute** Permission is hereby granted, free of charge, to any person obtaining a copy* of this software and associated documentation files (the "Software"), to deal* in the Software without restriction, including without limitation the rights* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell* copies of the Software, and to permit persons to whom the Software is* furnished to do so, subject to the following conditions:** The above copyright notice and this permission notice shall be included in* all copies or substantial portions of the Software.** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN* THE SOFTWARE.*/
package htsjdk.samtools;import htsjdk.beta.plugin.HtsHeader;
import htsjdk.samtools.util.Log;import java.io.Serializable;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.util.*;
import java.util.stream.Collectors;import static htsjdk.samtools.SAMSequenceRecord.*;/*** Collection of SAMSequenceRecords.*/public class SAMSequenceDictionary implements HtsHeader, Serializable {public static final long serialVersionUID = 1L;private List<SAMSequenceRecord> mSequences = new ArrayList<>();private final Map<String, SAMSequenceRecord> mSequenceMap = new HashMap<>();public SAMSequenceDictionary() {}public SAMSequenceDictionary(final List<SAMSequenceRecord> list) {this();setSequences(list);}public List<SAMSequenceRecord> getSequences() {return Collections.unmodifiableList(mSequences);}private static Log log = Log.getInstance(SAMSequenceDictionary.class);public SAMSequenceRecord getSequence(final String name) {return mSequenceMap.get(name);}/*** Replaces the existing list of SAMSequenceRecords with the given list.* Reset the aliases** @param list This value is copied and validated.*/public void setSequences(final List<SAMSequenceRecord> list) {mSequences = new ArrayList<>(list.size());mSequenceMap.clear();list.forEach(this::addSequence);}public void addSequence(final SAMSequenceRecord sequenceRecord) {if (mSequenceMap.containsKey(sequenceRecord.getSequenceName())) {throw new IllegalArgumentException("Cannot add sequence that already exists in SAMSequenceDictionary: " +sequenceRecord.getSequenceName());}sequenceRecord.setSequenceIndex(mSequences.size());mSequences.add(sequenceRecord);mSequenceMap.put(sequenceRecord.getSequenceName(), sequenceRecord);sequenceRecord.getAlternativeSequenceNames().forEach(an -> addSequenceAlias(sequenceRecord.getSequenceName(), an));}/*** @return The SAMSequenceRecord with the given index, or null if index is out of range.*/public SAMSequenceRecord getSequence(final int sequenceIndex) {if (sequenceIndex < 0 || sequenceIndex >= mSequences.size()) {return null;}return mSequences.get(sequenceIndex);}/*** @return The index for the given sequence name, or {@value SAMSequenceRecord#UNAVAILABLE_SEQUENCE_INDEX} if the name is not found.*/public int getSequenceIndex(final String sequenceName) {final SAMSequenceRecord record = mSequenceMap.get(sequenceName);if (record == null) {return UNAVAILABLE_SEQUENCE_INDEX;}return record.getSequenceIndex();}/*** @return number of SAMSequenceRecord(s) in this dictionary*/public int size() {return mSequences.size();}/*** @return The sum of the lengths of the sequences in this dictionary*/public long getReferenceLength() {return getSequences().stream().mapToLong(SAMSequenceRecord::getSequenceLength).sum();}/*** @return true is the dictionary is empty*/public boolean isEmpty() {return mSequences.isEmpty();}private static String DICT_MISMATCH_TEMPLATE = "SAM dictionaries are not the same: %s.";/*** Non-comprehensive {@link #equals(Object)}-assertion: instead of calling {@link SAMSequenceRecord#equals(Object)} on constituent* {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call* {@link SAMSequenceRecord#isSameSequence(SAMSequenceRecord)}.* Aliases are ignored.** @throws AssertionError When the dictionaries are not the same, with some human-readable information as to why*/public void assertSameDictionary(final SAMSequenceDictionary that) {if (this == that) return;final Iterator<SAMSequenceRecord> thatSequences = that.mSequences.iterator();for (final SAMSequenceRecord thisSequence : mSequences) {if (!thatSequences.hasNext()) {throw new AssertionError(String.format(DICT_MISMATCH_TEMPLATE, thisSequence + " is present in only one dictionary"));} else {final SAMSequenceRecord thatSequence = thatSequences.next();if(!thatSequence.isSameSequence(thisSequence)) {throw new AssertionError(String.format(DICT_MISMATCH_TEMPLATE, thatSequence + " was found when " + thisSequence + " was expected"));}}}if (thatSequences.hasNext())throw new AssertionError(String.format(DICT_MISMATCH_TEMPLATE, thatSequences.next() + " is present in only one dictionary"));}/*** Non-comprehensive {@link #equals(Object)}-validation: instead of calling {@link SAMSequenceRecord#equals(Object)} on constituent* {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call* {@link SAMSequenceRecord#isSameSequence(SAMSequenceRecord)}.** @param that {@link SAMSequenceDictionary} to compare against* @return true if the dictionaries are the same, false otherwise**/public boolean isSameDictionary(final SAMSequenceDictionary that) {if (that == null || that.mSequences == null) return false;if (this == that) return true;final Iterator<SAMSequenceRecord> thatSequences = that.mSequences.iterator();for (final SAMSequenceRecord thisSequence : mSequences) {if (!thatSequences.hasNext()) {return false;} else {final SAMSequenceRecord thatSequence = thatSequences.next();if (!thatSequence.isSameSequence(thisSequence)) {return false;}}}return !thatSequences.hasNext();}/*** Returns {@code true} if the two dictionaries are the same.** <p>NOTE: Aliases are NOT considered, but alternative sequence names (AN tag) names ARE.*/@Overridepublic boolean equals(Object o) {if (this == o) return true;if (o == null || getClass() != o.getClass()) return false;SAMSequenceDictionary that = (SAMSequenceDictionary) o;return mSequences.equals(that.mSequences);}/*** Add an alias to a SAMSequenceRecord. This can be use to provide some* alternate names fo a given contig. e.g:* <code>1,chr1,chr01,01,CM000663,NC_000001.10</code> e.g:* <code>MT,chrM</code>** <p>NOTE: this method does not add the alias to the alternative sequence name tag (AN) in the SAMSequenceRecord.* If you would like to add it to the AN tag, use {@link #addAlternativeSequenceName(String, String)} instead.** @param originalName existing contig name* @param altName new contig name* @return the contig associated to the 'originalName/altName'*/public SAMSequenceRecord addSequenceAlias(final String originalName,final String altName) {if (originalName == null) throw new IllegalArgumentException("original name cannot be null");if (altName == null) throw new IllegalArgumentException("alt name cannot be null");final SAMSequenceRecord originalSeqRecord = getSequence(originalName);if (originalSeqRecord == null) throw new IllegalArgumentException("Sequence " + originalName + " doesn't exist in dictionary.");// same name, nothing to doif (originalName.equals(altName)) return originalSeqRecord;final SAMSequenceRecord altSeqRecord = getSequence(altName);if (altSeqRecord != null) {// alias was already set to the same recordif (altSeqRecord.equals(originalSeqRecord)) return originalSeqRecord;// alias was already set to another recordthrow new IllegalArgumentException("Alias " + altName + " for " + originalSeqRecord +" was already set to " + altSeqRecord.getSequenceName());}mSequenceMap.put(altName, originalSeqRecord);return originalSeqRecord;}/*** Add an alternative sequence name (AN tag) to a SAMSequenceRecord, including it into the aliases* to retrieve the contigs (as with {@link #addSequenceAlias(String, String)}.** <p>This can be use to provide some alternate names fo a given contig. e.g:* <code>1,chr1,chr01,01,CM000663</code> or* <code>MT,chrM</code>.** @param originalName existing contig name* @param altName new contig name* @return the contig associated to the 'originalName/altName', with the AN tag including the altName*/public SAMSequenceRecord addAlternativeSequenceName(final String originalName,final String altName) {final SAMSequenceRecord record = addSequenceAlias(originalName, altName);record.addAlternativeSequenceName(altName);return record;}/*** return a MD5 sum for ths dictionary, the checksum is re-computed each* time this method is called.** <pre>* md5( (seq1.md5_if_available) + ' '+(seq2.name+seq2.length) + ' '+...)* </pre>** @return a MD5 checksum for this dictionary or the empty string if it is* empty*/public String md5() {if (isEmpty())return "";try {final MessageDigest md5 = MessageDigest.getInstance("MD5");md5.reset();for (final SAMSequenceRecord samSequenceRecord : mSequences) {if (samSequenceRecord.getSequenceIndex() > 0)md5.update((byte) ' ');final String md5_tag = samSequenceRecord.getAttribute(SAMSequenceRecord.MD5_TAG);if (md5_tag != null) {md5.update(md5_tag.getBytes());} else {md5.update(samSequenceRecord.getSequenceName().getBytes());md5.update(String.valueOf(samSequenceRecord.getSequenceLength()).getBytes());}}String hash = new BigInteger(1, md5.digest()).toString(16);if (hash.length() != 32) {final String zeros = "00000000000000000000000000000000";hash = zeros.substring(0, 32 - hash.length()) + hash;}return hash;} catch (Exception e) {throw new RuntimeException(e);}}@Overridepublic int hashCode() {return mSequences.hashCode();}@Overridepublic String toString() {return "SAMSequenceDictionary:( sequences:"+ size()+" length:"+ getReferenceLength()+" "+" md5:"+md5()+")";}public static final List<String> DEFAULT_DICTIONARY_EQUAL_TAG = Arrays.asList(SAMSequenceRecord.MD5_TAG,SAMSequenceRecord.SEQUENCE_LENGTH_TAG);/*** Will merge dictionaryTags from two dictionaries into one focusing on merging the tags rather than the sequences.** Requires that dictionaries have the same SAMSequence records in the same order.* For each sequenceIndex, the union of the tags from both sequences will be added to the new sequence, mismatching* values (for tags that are in both) will generate a warning, and the value from dict1 will be used.* For tags that are in tagsToEquate an unequal value will generate an error (an IllegalArgumentException will* be thrown.) tagsToEquate must include LN and MD.** @param dict1 first dictionary* @param dict2 first dictionary* @param tagsToMatch list of tags that must be equal if present in both sequence. Must contain MD, and LN* @return dictionary consisting of the same sequences as the two inputs with the merged values of tags.*/public static SAMSequenceDictionary mergeDictionaries(final SAMSequenceDictionary dict1,final SAMSequenceDictionary dict2,final List<String> tagsToMatch) {// We require MD and LN to match.if (!tagsToMatch.contains(MD5_TAG) || !tagsToMatch.contains(SEQUENCE_LENGTH_TAG)) {throw new IllegalArgumentException("Both " + MD5_TAG + " and " + SEQUENCE_LENGTH_TAG + " must be matched " +"when merging dictionaries. Found: " + String.join(",", tagsToMatch));}if (!dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList()).equals(dict2.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList()))) {throw new IllegalArgumentException(String.format("Do not use this function to merge dictionaries with " +"different sequences in them. Sequences must be in the same order as well. Found [%s] and [%s].",dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.joining(", ")),dict2.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.joining(", "))));}final SAMSequenceDictionary finalDict = new SAMSequenceDictionary();for (int sequenceIndex = 0; sequenceIndex < dict1.getSequences().size(); sequenceIndex++) {final SAMSequenceRecord s1 = dict1.getSequence(sequenceIndex);final SAMSequenceRecord s2 = dict2.getSequence(sequenceIndex);final String sName = s1.getSequenceName();final SAMSequenceRecord sMerged = new SAMSequenceRecord(sName, UNKNOWN_SEQUENCE_LENGTH);finalDict.addSequence(sMerged);final Set<String> allTags = new HashSet<>();s1.getAttributes().forEach(a -> allTags.add(a.getKey()));s2.getAttributes().forEach(a -> allTags.add(a.getKey()));for (final String tag : allTags) {final String value1 = s1.getAttribute(tag);final String value2 = s2.getAttribute(tag);if (value1 != null && value2 != null && !value1.equals(value2)) {String baseMessage = String.format("Found sequence entry for which " +"tags differ: %s and tag %s has the two values: %s and %s.",sName, tag, value1, value2);if (tagsToMatch.contains(tag)) {log.error("Cannot merge dictionaries. ", baseMessage);throw new IllegalArgumentException("Cannot merge dictionaries. " + baseMessage);} else {log.warn(baseMessage, " Using ", value1);}}sMerged.setAttribute(tag, value1 == null ? value2 : value1);}final int length1 = s1.getSequenceLength();final int length2 = s2.getSequenceLength();if (length1 != UNKNOWN_SEQUENCE_LENGTH && length2 != UNKNOWN_SEQUENCE_LENGTH && length1 != length2) {throw new IllegalArgumentException(String.format("Cannot merge the two dictionaries. " +"Found sequence entry for which " + "lengths differ: %s has lengths %s and %s", sName, length1, length2));}sMerged.setSequenceLength(length1 == UNKNOWN_SEQUENCE_LENGTH ? length2 : length1);}return finalDict;}
}
SAMSequenceRecord
类
SAMSequenceRecord
是一个类,用于表示单个 contig 的详细信息。它包含了 contig 的基本信息,如名称和长度。
主要功能
- 描述 contig:提供关于 contig 的详细信息,如名称和长度。
- 与
SAMSequenceDictionary
配合使用:SAMSequenceRecord
对象通常通过SAMSequenceDictionary
来管理和访问。
SAMSequenceRecord
类源码:
/** The MIT License** Copyright (c) 2009 The Broad Institute** Permission is hereby granted, free of charge, to any person obtaining a copy* of this software and associated documentation files (the "Software"), to deal* in the Software without restriction, including without limitation the rights* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell* copies of the Software, and to permit persons to whom the Software is* furnished to do so, subject to the following conditions:** The above copyright notice and this permission notice shall be included in* all copies or substantial portions of the Software.** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN* THE SOFTWARE.*/
package htsjdk.samtools;import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.StringUtil;import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;/*** Header information about a reference sequence. Corresponds to @SQ header record in SAM text header.*/public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Cloneable, Locatable {public static final long serialVersionUID = 1L; // AbstractSAMHeaderRecord implements Serializablepublic static final int UNAVAILABLE_SEQUENCE_INDEX = -1;private final String mSequenceName; // Value must be interned() if it's ever set/modifiedprivate Set<String> mAlternativeSequenceName = new LinkedHashSet<>();private int mSequenceIndex = UNAVAILABLE_SEQUENCE_INDEX;private int mSequenceLength = 0;public static final String SEQUENCE_NAME_TAG = "SN";public static final String ALTERNATIVE_SEQUENCE_NAME_TAG = "AN";public static final String SEQUENCE_LENGTH_TAG = "LN";public static final String MD5_TAG = "M5";public static final String ASSEMBLY_TAG = "AS";public static final String URI_TAG = "UR";public static final String SPECIES_TAG = "SP";public static final String DESCRIPTION_TAG = "DS";/*** If one sequence has this length, and another sequence had a different length, isSameSequence will* not complain that they are different sequences.*/public static final int UNKNOWN_SEQUENCE_LENGTH = 0;/*** This is not a valid sequence name, because it is reserved in the RNEXT field of SAM text format* to mean "same reference as RNAME field."*/public static final String RESERVED_RNEXT_SEQUENCE_NAME = "=";/* use RESERVED_RNEXT_SEQUENCE_NAME instead. */@Deprecatedpublic static final String RESERVED_MRNM_SEQUENCE_NAME = RESERVED_RNEXT_SEQUENCE_NAME;/*** The standard tags are stored in text header without type information, because the type of these tags is known.*/public static final Set<String> STANDARD_TAGS =new HashSet<>(Arrays.asList(SEQUENCE_NAME_TAG, SEQUENCE_LENGTH_TAG, ASSEMBLY_TAG, ALTERNATIVE_SEQUENCE_NAME_TAG, MD5_TAG, URI_TAG, SPECIES_TAG));// These are the chars matched by \\s.private static final char[] WHITESPACE_CHARS = {' ', '\t', '\n', '\013', '\f', '\r'}; // \013 is vertical tab// alternative sequence name separatorprivate static final String ALTERNATIVE_SEQUENCE_NAME_SEPARATOR = ",";private static final Pattern LEGAL_RNAME_PATTERN = Pattern.compile("[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*");/*** @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.* sequenceLength is required for the object to be considered valid.*/@Deprecatedpublic SAMSequenceRecord(final String name) {this(name, UNKNOWN_SEQUENCE_LENGTH);}public SAMSequenceRecord(final String name, final int sequenceLength) {if (name != null) {validateSequenceName(name);mSequenceName = name.intern();} else {mSequenceName = null;}mSequenceLength = sequenceLength;}public String getSequenceName() {return mSequenceName;}public int getSequenceLength() {return mSequenceLength;}public SAMSequenceRecord setSequenceLength(final int value) {mSequenceLength = value;return this;}public String getAssembly() {return (String) getAttribute(ASSEMBLY_TAG);}public SAMSequenceRecord setAssembly(final String value) {setAttribute(ASSEMBLY_TAG, value);return this;}public String getSpecies() {return (String) getAttribute(SPECIES_TAG);}public SAMSequenceRecord setSpecies(final String value) {setAttribute(SPECIES_TAG, value);return this;}public String getMd5() {return (String) getAttribute(MD5_TAG);}public SAMSequenceRecord setMd5(final String value) {setAttribute(MD5_TAG, value);return this;}public String getDescription() {return getAttribute(DESCRIPTION_TAG);}public SAMSequenceRecord setDescription(final String value) {setAttribute(DESCRIPTION_TAG, value);return this;}/*** @return Index of this record in the sequence dictionary it lives in.*/public int getSequenceIndex() {return mSequenceIndex;}// Private state used only by SAM implementation.public SAMSequenceRecord setSequenceIndex(final int value) {mSequenceIndex = value;return this;}/*** Returns unmodifiable set with alternative sequence names.*/public Set<String> getAlternativeSequenceNames() {final String anTag = getAttribute(ALTERNATIVE_SEQUENCE_NAME_TAG);return (anTag == null) ? Collections.emptySet(): Collections.unmodifiableSet(new LinkedHashSet<>(Arrays.asList(anTag.split(ALTERNATIVE_SEQUENCE_NAME_SEPARATOR))));}/*** Adds an alternative sequence name if it is not the same as the sequence name or it is not present already.*/public void addAlternativeSequenceName(final String name) {final Set<String> altSequences = new HashSet<>(getAlternativeSequenceNames());if (!mSequenceName.equals(name)) {altSequences.add(name);}encodeAltSequences(altSequences);}/*** Sets the alternative sequence names in the order provided by iteration, removing the previous values.*/public SAMSequenceRecord setAlternativeSequenceName(final Collection<String> alternativeSequences) {if (alternativeSequences == null) {setAttribute(ALTERNATIVE_SEQUENCE_NAME_TAG, null);} else {// encode all alt sequence namesencodeAltSequences(alternativeSequences);}return this;}private static void validateAltRegExp(final String name) {if (!LEGAL_RNAME_PATTERN.matcher(name).matches()) {throw new IllegalArgumentException(String.format("Invalid alternative sequence name '%s': do not match the pattern %s", name, LEGAL_RNAME_PATTERN));}}private void encodeAltSequences(final Collection<String> alternativeSequences) {//make sure that the order in which alternate names are joined is determinedsetAttribute(ALTERNATIVE_SEQUENCE_NAME_TAG, alternativeSequences.isEmpty() ? null : alternativeSequences.stream().sorted().distinct().peek(SAMSequenceRecord::validateAltRegExp).collect(Collectors.joining(ALTERNATIVE_SEQUENCE_NAME_SEPARATOR)));}/*** Returns {@code true} if there are alternative sequence names; {@code false} otherwise.*/public boolean hasAlternativeSequenceNames() {return getAttribute(ALTERNATIVE_SEQUENCE_NAME_TAG) != null;}/*** Looser comparison than equals(). We look only at sequence index, sequence length, and MD5 tag value* (or sequence names, if there is no MD5 tag in either record.*/public boolean isSameSequence(final SAMSequenceRecord that) {if (this == that) {return true;}if (that == null) {return false;}if (mSequenceIndex != that.mSequenceIndex) {return false;}// PIC-439. Allow undefined length.if (mSequenceLength != UNKNOWN_SEQUENCE_LENGTH && that.mSequenceLength != UNKNOWN_SEQUENCE_LENGTH && mSequenceLength != that.mSequenceLength) {return false;}if (this.getAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getAttribute(SAMSequenceRecord.MD5_TAG) != null) {final BigInteger thisMd5 = new BigInteger((String) this.getAttribute(SAMSequenceRecord.MD5_TAG), 16);final BigInteger thatMd5 = new BigInteger((String) that.getAttribute(SAMSequenceRecord.MD5_TAG), 16);if (!thisMd5.equals(thatMd5)) {return false;}} else {// Compare using == since we intern() the Stringsif (mSequenceName != that.mSequenceName) {// if they are different, they could still be the same based on the alternative sequencesif (getAlternativeSequenceNames().contains(that.mSequenceName) ||that.getAlternativeSequenceNames().contains(mSequenceName)) {return true;}return false;}}return true;}@Overridepublic boolean equals(final Object o) {if (this == o) {return true;}if (!(o instanceof SAMSequenceRecord)) {return false;}final SAMSequenceRecord that = (SAMSequenceRecord) o;if (mSequenceIndex != that.mSequenceIndex) {return false;}if (mSequenceLength != that.mSequenceLength) {return false;}if (!attributesEqual(that)) {return false;}if (mSequenceName != that.mSequenceName) {return false; // Compare using == since we intern() the name}if (!getAlternativeSequenceNames().equals(that.getAlternativeSequenceNames())) {return false;}return true;}@Overridepublic int hashCode() {return mSequenceName != null ? mSequenceName.hashCode() : 0;}@OverrideSet<String> getStandardTags() {return STANDARD_TAGS;}@Overridepublic final SAMSequenceRecord clone() {final SAMSequenceRecord ret = new SAMSequenceRecord(this.mSequenceName, this.mSequenceLength);ret.mSequenceIndex = this.mSequenceIndex;for (final Map.Entry<String, String> entry : this.getAttributes()) {ret.setAttribute(entry.getKey(), entry.getValue());}return ret;}/*** Truncate sequence name at first whitespace.*/public static String truncateSequenceName(final String sequenceName) {/** Instead of using regex split, do it manually for better performance.*/int truncateAt = sequenceName.length();for (final char c : WHITESPACE_CHARS) {int index = sequenceName.indexOf(c);if (index != UNAVAILABLE_SEQUENCE_INDEX && index < truncateAt) {truncateAt = index;}}return sequenceName.substring(0, truncateAt);}/*** Throw an exception if the sequence name is not valid.*/public static void validateSequenceName(final String name) {if (!LEGAL_RNAME_PATTERN.matcher(name).useAnchoringBounds(true).matches()) {throw new SAMException(String.format("Sequence name '%s' doesn't match regex: '%s' ", name, LEGAL_RNAME_PATTERN));}}@Overridepublic String toString() {return String.format("SAMSequenceRecord(name=%s,length=%s,dict_index=%s,assembly=%s,alternate_names=%s)",getSequenceName(),getSequenceLength(),getSequenceIndex(),getAssembly(),getAlternativeSequenceNames());}@Overridepublic String getSAMString() {return new SAMTextHeaderCodec().getSQLine(this);}/*** always returns <code>getSequenceName()</code>** @see #getSequenceName()*/@Overridepublic final String getContig() {return this.getSequenceName();}/*** always returns 1*/@Overridepublic final int getStart() {return 1;}/*** always returns <code>getSequenceLength()</code>** @see #getSequenceLength()*/@Overridepublic final int getEnd() {return this.getSequenceLength();}
}