nucleotide-codons

1. Readme

核苷酸密码子

编写一个函数,返回一个特定密码子的氨基酸名称,可能使用速记,编码.

在 DNA 序列中,3 个核苷酸称为密码子,编码氨基酸。通常密码子编码相同的氨基酸。国际纯和应用化学联盟开发了一个简写系统,用于命名密码子组,其编码为相同氨基酸。

简单地说,他们把四个字母 A、C、G 和 T 扩展成一堆代表不同可能性的字母。例如 R 代表 A 和 G, 所以 TAR 可表示为 TAA 和 TAG (把 “TAR” 当成正则式形式的 “TA[AG]”)。

编写一些代码,给出一个密码子,可以使用速记,然后返回密码子所编码的氨基酸的名称。您将得到一个非速记密码/名称对的列表,可作为您计算的基础。

见:维基百科.

2. 开始你的表演



3. 测试代码查看


# #![allow(unused_variables)]
#fn main() {
#[test]
fn test_methionine() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("ATG"), Ok("methionine"));
}

#[test]
//#[ignore]
fn test_cysteine_tgt() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("TGT"), Ok("cysteine"));
}

#[test]
//#[ignore]
fn test_cysteine_tgy() {
   // "compressed" name for TGT and TGC
   let info = parse(make_pairs());
   assert_eq!(info.name_for("TGT"), info.name_for("TGY"));
   assert_eq!(info.name_for("TGC"), info.name_for("TGY"));
}

#[test]
//#[ignore]
fn test_stop() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("TAA"), Ok("stop codon"));
}

#[test]
//#[ignore]
fn test_valine() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("GTN"), Ok("valine"));
}

#[test]
//#[ignore]
fn test_isoleucine() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("ATH"), Ok("isoleucine"));
}

#[test]
//#[ignore]
fn test_arginine_name() {
   // In arginine CGA can be "compressed" both as CGN and as MGR
   let info = parse(make_pairs());
   assert_eq!(info.name_for("CGA"), Ok("arginine"));
   assert_eq!(info.name_for("CGN"), Ok("arginine"));
   assert_eq!(info.name_for("MGR"), Ok("arginine"));
}

#[test]
//#[ignore]
fn empty_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("").is_err());
}

#[test]
//#[ignore]
fn x_is_not_shorthand_so_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("VWX").is_err());
}

#[test]
//#[ignore]
fn too_short_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("AT").is_err());
}

#[test]
//#[ignore]
fn too_long_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("ATTA").is_err());
}

// The input data constructor. Returns a list of codon, name pairs.
fn make_pairs() -> Vec<(&'static str, &'static str)> {
   let grouped = vec![
       ("isoleucine", vec!["ATT", "ATC", "ATA"]),
       ("leucine", vec!["CTT", "CTC", "CTA", "CTG", "TTA", "TTG"]),
       ("valine", vec!["GTT", "GTC", "GTA", "GTG"]),
       ("phenylalanine", vec!["TTT", "TTC"]),
       ("methionine", vec!["ATG"]),
       ("cysteine", vec!["TGT", "TGC"]),
       ("alanine", vec!["GCT", "GCC", "GCA", "GCG"]),
       ("glycine", vec!["GGT", "GGC", "GGA", "GGG"]),
       ("proline", vec!["CCT", "CCC", "CCA", "CCG"]),
       ("threonine", vec!["ACT", "ACC", "ACA", "ACG"]),
       ("serine", vec!["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"]),
       ("tyrosine", vec!["TAT", "TAC"]),
       ("tryptophan", vec!["TGG"]),
       ("glutamine", vec!["CAA", "CAG"]),
       ("asparagine", vec!["AAT", "AAC"]),
       ("histidine", vec!["CAT", "CAC"]),
       ("glutamic acid", vec!["GAA", "GAG"]),
       ("aspartic acid", vec!["GAT", "GAC"]),
       ("lysine", vec!["AAA", "AAG"]),
       ("arginine", vec!["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"]),
       ("stop codon", vec!["TAA", "TAG", "TGA"]),
   ];
   let mut pairs = Vec::<(&'static str, &'static str)>::new();
   for (name, codons) in grouped.into_iter() {
       for codon in codons {
           pairs.push((codon, name));
       }
   }
   pairs.sort_by(|&(_, a), &(_, b)| a.cmp(b));
   return pairs;
}

#}

4. 答案


# #![allow(unused_variables)]
#fn main() {
use std::collections::HashMap;

pub struct CodonInfo<'a> {
   actual_codons: HashMap<&'a str, &'a str>,
}

pub fn parse<'a>(pairs: Vec<(&'a str, &'a str)>) -> CodonInfo<'a> {
   CodonInfo {
       actual_codons: pairs.into_iter().collect(),
   }
}

impl<'a> CodonInfo<'a> {
   pub fn name_for(&self, codon: &str) -> Result<&'a str, &'static str> {
       if codon.len() != 3 {
           return Err("invalid length");
       }

       let mut valid = true;
       let lookup: String = codon
           .chars()
           .map(|l| {
               // Get an example of a "letter" represented by the possibly encoded letter.
               // Since every codon represented by the compressed notation has to be of
               // the desired amino acid just picking one at random will do.
               match l {
                   'A' | 'W' | 'M' | 'R' | 'D' | 'H' | 'V' | 'N' => 'A',
                   'C' | 'S' | 'Y' | 'B' => 'C',
                   'G' | 'K' => 'G',
                   'T' => 'T',
                   _ => {
                       valid = false;
                       ' '
                   }
               }
           })
           .collect();
       if !valid {
           return Err("invalid char");
       }

       // If the input table is correct (which it is) every valid codon is in it
       // so unwrap() shouldn't panic.
       Ok(self.actual_codons.get(&lookup.as_ref()).unwrap())
   }
}

#}



填充/相关