Nucleotide Count

1. Readme

核苷酸(Nucleotide)计数

给定一条单 DNA 链 ,计算每个核苷酸在字符串中出现的次数.

地球上每一生物的遗传语言都是 DNA.DNA 是一种大分子,它是由一系列叫做核苷酸的单个元素组成。DNA 中存在 4 种类型,它们仅略有不同,并且可用以下符号表示:’A’表示腺嘌呤,’C’表示胞嘧啶,’G’表示鸟嘌呤,’T’表示胸腺嘧啶.

下面是一个类比:

  • 树枝组成鸟的巢
  • 核苷酸组成 DNA 链

资源

罗瑟琳 DNA 核苷酸问题的计算http://rosalind.info/problems/dna/

2. 开始你的表演

use std::collections::HashMap;

pub fn count(nucleotide: char, dna: &str) -> Result<usize, char> {
   unimplemented!(
       "How much of nucleotide type '{}' is contained inside DNA string '{}'?",
       nucleotide,
       dna
   );
}

pub fn nucleotide_counts(dna: &str) -> Result<HashMap<char, usize>, char> {
   unimplemented!(
       "How much of every nucleotide type is contained inside DNA string '{}'?",
       dna
   );
}

3. 测试代码查看


# #![allow(unused_variables)]
#fn main() {
// use std::collections::HashMap;

fn check_dna(s: &str, pairs: &[(char, usize)]) {
   // The reason for the awkward code in here is to ensure that the failure
   // message for assert_eq! is as informative as possible. A simpler
   // solution would simply check the length of the map, and then
   // check for the presence and value of each key in the given pairs vector.
   let mut m: HashMap<char, usize> = nucleotide_counts(s).unwrap();
   for &(k, v) in pairs.iter() {
       assert_eq!((k, m.remove(&k)), (k, Some(v)));
   }
   // may fail with a message that clearly shows all extra pairs in the map
   assert_eq!(m.iter().collect::<Vec<(&char, &usize)>>(), vec![]);
}

#[test]
fn count_returns_result() {
   assert!(count('A', "").is_ok());
}

#[test]
//#[ignore]
fn test_count_empty() {
   assert_eq!(count('A', ""), Ok(0));
}

#[test]
//#[ignore]
fn count_invalid_nucleotide() {
   assert_eq!(count('X', "A"), Err('X'));
}

#[test]
//#[ignore]
fn count_invalid_dna() {
   assert_eq!(count('A', "AX"), Err('X'));
}

#[test]
//#[ignore]
fn test_count_repetitive_cytosine() {
   assert_eq!(count('C', "CCCCC"), Ok(5));
}

#[test]
//#[ignore]
fn test_count_only_thymine() {
   assert_eq!(count('T', "GGGGGTAACCCGG"), Ok(1));
}

#[test]
//#[ignore]
fn counts_returns_result() {
   assert!(nucleotide_counts("ACGT").is_ok());
}

#[test]
//#[ignore]
fn test_nucleotide_count_empty() {
   check_dna("", &[('A', 0), ('T', 0), ('C', 0), ('G', 0)]);
}

#[test]
//#[ignore]
fn test_nucleotide_count_only_guanine() {
   check_dna("GGGGGGGG", &[('A', 0), ('T', 0), ('C', 0), ('G', 8)]);
}

#[test]
//#[ignore]
fn test_nucleotide_count_counts_all() {
   check_dna(
       "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAA\
        GAGTGTCTGATAGCAGC",
       &[('A', 20), ('T', 21), ('C', 12), ('G', 17)],
   );
}

#[test]
//#[ignore]
fn counts_invalid_nucleotide_results_in_err() {
   assert_eq!(nucleotide_counts("GGXXX"), Err('X'));
}

#}

4. 答案


# #![allow(unused_variables)]
#fn main() {
use std::collections::HashMap;

static VALID_NUCLEOTIDES: &'static str = "ACGT";

fn valid(c: char) -> Result<char, char> {
   if VALID_NUCLEOTIDES.contains(c) {
       Ok(c)
   } else {
       Err(c)
   }
}

pub fn count(nucleotide: char, input: &str) -> Result<usize, char> {
   valid(nucleotide)?;
   let mut count = 0;
   for c in input.chars() {
       if valid(c)? == nucleotide {
           count += 1;
       }
   }
   Ok(count)
}

pub fn nucleotide_counts(input: &str) -> Result<HashMap<char, usize>, char> {
   let mut map: HashMap<char, usize> = VALID_NUCLEOTIDES.chars().map(|c| (c, 0)).collect();
   for nucleotide in input.chars() {
       if let Some(n) = map.get_mut(&nucleotide) {
           *n += 1;
       } else {
           return Err(nucleotide);
       }
   }
   Ok(map)
}

#}



填充/相关