Nucleotide Count
1. Readme
核苷酸(Nucleotide)计数
给定一条单 DNA 链 ,计算每个核苷酸在字符串中出现的次数.
地球上每一生物的遗传语言都是 DNA.DNA 是一种大分子,它是由一系列叫做核苷酸的单个元素组成。DNA 中存在 4 种类型,它们仅略有不同,并且可用以下符号表示:’A’表示腺嘌呤,’C’表示胞嘧啶,’G’表示鸟嘌呤,’T’表示胸腺嘧啶.
下面是一个类比:
- 树枝组成鸟的巢
- 核苷酸组成 DNA 链
资源
罗瑟琳 DNA 核苷酸问题的计算http://rosalind.info/problems/dna/
2. 开始你的表演
use std::collections::HashMap; pub fn count(nucleotide: char, dna: &str) -> Result<usize, char> { unimplemented!( "How much of nucleotide type '{}' is contained inside DNA string '{}'?", nucleotide, dna ); } pub fn nucleotide_counts(dna: &str) -> Result<HashMap<char, usize>, char> { unimplemented!( "How much of every nucleotide type is contained inside DNA string '{}'?", dna ); }
3. 测试代码查看
# #![allow(unused_variables)] #fn main() { // use std::collections::HashMap; fn check_dna(s: &str, pairs: &[(char, usize)]) { // The reason for the awkward code in here is to ensure that the failure // message for assert_eq! is as informative as possible. A simpler // solution would simply check the length of the map, and then // check for the presence and value of each key in the given pairs vector. let mut m: HashMap<char, usize> = nucleotide_counts(s).unwrap(); for &(k, v) in pairs.iter() { assert_eq!((k, m.remove(&k)), (k, Some(v))); } // may fail with a message that clearly shows all extra pairs in the map assert_eq!(m.iter().collect::<Vec<(&char, &usize)>>(), vec![]); } #[test] fn count_returns_result() { assert!(count('A', "").is_ok()); } #[test] //#[ignore] fn test_count_empty() { assert_eq!(count('A', ""), Ok(0)); } #[test] //#[ignore] fn count_invalid_nucleotide() { assert_eq!(count('X', "A"), Err('X')); } #[test] //#[ignore] fn count_invalid_dna() { assert_eq!(count('A', "AX"), Err('X')); } #[test] //#[ignore] fn test_count_repetitive_cytosine() { assert_eq!(count('C', "CCCCC"), Ok(5)); } #[test] //#[ignore] fn test_count_only_thymine() { assert_eq!(count('T', "GGGGGTAACCCGG"), Ok(1)); } #[test] //#[ignore] fn counts_returns_result() { assert!(nucleotide_counts("ACGT").is_ok()); } #[test] //#[ignore] fn test_nucleotide_count_empty() { check_dna("", &[('A', 0), ('T', 0), ('C', 0), ('G', 0)]); } #[test] //#[ignore] fn test_nucleotide_count_only_guanine() { check_dna("GGGGGGGG", &[('A', 0), ('T', 0), ('C', 0), ('G', 8)]); } #[test] //#[ignore] fn test_nucleotide_count_counts_all() { check_dna( "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAA\ GAGTGTCTGATAGCAGC", &[('A', 20), ('T', 21), ('C', 12), ('G', 17)], ); } #[test] //#[ignore] fn counts_invalid_nucleotide_results_in_err() { assert_eq!(nucleotide_counts("GGXXX"), Err('X')); } #}
4. 答案
# #![allow(unused_variables)] #fn main() { use std::collections::HashMap; static VALID_NUCLEOTIDES: &'static str = "ACGT"; fn valid(c: char) -> Result<char, char> { if VALID_NUCLEOTIDES.contains(c) { Ok(c) } else { Err(c) } } pub fn count(nucleotide: char, input: &str) -> Result<usize, char> { valid(nucleotide)?; let mut count = 0; for c in input.chars() { if valid(c)? == nucleotide { count += 1; } } Ok(count) } pub fn nucleotide_counts(input: &str) -> Result<HashMap<char, usize>, char> { let mut map: HashMap<char, usize> = VALID_NUCLEOTIDES.chars().map(|c| (c, 0)).collect(); for nucleotide in input.chars() { if let Some(n) = map.get_mut(&nucleotide) { *n += 1; } else { return Err(nucleotide); } } Ok(map) } #}