Protein Translation

1. Readme

蛋白质转译

将 RNA 序列转译成蛋白质.

RNA 可以分解为三个称为密码子的核苷酸序列,然后转译成多肽,如下:

RNA:"AUGUUUUCU"=>转译成

密码子:"AUG", "UUU", "UCU"=>其成为具有以下序列的多肽=>

蛋白:"Methionine", "Phenylalanine", "Serine"

这有 64 个密码子,而这些密码子又相当于 20 个氨基酸;然而,在本练习中,所有密码子序列和所得氨基酸都不重要。如果它适用于一个密码子,该程序应该适用于所有这些密码子。但是,您可以随意扩展测试套件中的列表以包含它们.

还有三个终止密码子(也称为’STOP’密码子);如果遇到任何这些密码子(通过核糖体),那么所有转译结束,并终止蛋白质。

之后的所有后续密码子都会被忽略,如下所示:

RNA:"AUGUUUUCUUAAAUG"=>

密码:"AUG", "UUU", "UCU", "UAA", "AUG"=>

蛋白:"Methionine", "Phenylalanine", "Serine"

注意终止密码子"UAA"终止转译,最终的蛋氨酸,不会转译成蛋白质序列。

以下是本练习所需的密码子和产生的氨基酸。

密码子 蛋白
AUG 蛋氨酸
UUU,UUC 苯丙氨酸
UUA,UUG 亮氨酸
UCU,UCC,UCA,UCG 丝氨酸
UAU,UAC 酪氨酸
UGU,UGC 半胱氨酸
UGG 色氨酸
UAA,UAG,UGA STOP

学习更多关于蛋白质转译:维基百科

资源

Tyler Long

2. 开始你的表演

use std::marker::PhantomData;

pub struct CodonsInfo<'a> {
   // This field is here to make the template compile and not to
   // complain about unused type lifetime parameter "'a". Once you start
   // solving the exercise, delete this field and the 'std::marker::PhantomData'
   // import.
   phantom: PhantomData<&'a ()>,
}

impl<'a> CodonsInfo<'a> {
   pub fn name_for(&self, codon: &str) -> Option<&'a str> {
       unimplemented!(
           "Return the protein name for a '{}' codon or None, if codon string is invalid",
           codon
       );
   }

   pub fn of_rna(&self, rna: &str) -> Option<Vec<&'a str>> {
       unimplemented!("Return a list of protein names that correspond to the '{}' RNA string or None if the RNA string is invalid", rna);
   }
}

pub fn parse<'a>(pairs: Vec<(&'a str, &'a str)>) -> CodonsInfo<'a> {
   unimplemented!(
       "Construct a new CodonsInfo struct from given pairs: {:?}",
       pairs
   );
}

3. 测试代码查看


# #![allow(unused_variables)]
#fn main() {
#[test]
fn test_methionine() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("AUG"), Some("methionine"));
}

#[test]
//#[ignore]
fn test_cysteine_tgt() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("UGU"), Some("cysteine"));
}

#[test]
//#[ignore]
fn test_stop() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("UAA"), Some("stop codon"));
}

#[test]
//#[ignore]
fn test_valine() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("GUU"), Some("valine"));
}

#[test]
//#[ignore]
fn test_isoleucine() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("AUU"), Some("isoleucine"));
}

#[test]
//#[ignore]
fn test_arginine_name() {
   let info = parse(make_pairs());
   assert_eq!(info.name_for("CGA"), Some("arginine"));
   assert_eq!(info.name_for("AGA"), Some("arginine"));
   assert_eq!(info.name_for("AGG"), Some("arginine"));
}

#[test]
//#[ignore]
fn empty_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("").is_none());
}

#[test]
//#[ignore]
fn x_is_not_shorthand_so_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("VWX").is_none());
}

#[test]
//#[ignore]
fn too_short_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("AU").is_none());
}

#[test]
//#[ignore]
fn too_long_is_invalid() {
   let info = parse(make_pairs());
   assert!(info.name_for("ATTA").is_none());
}

#[test]
//#[ignore]
fn test_translates_rna_strand_into_correct_protein() {
   let info = parse(make_pairs());
   assert_eq!(
       info.of_rna("AUGUUUUGG"),
       Some(vec!["methionine", "phenylalanine", "tryptophan"])
   );
}

#[test]
//#[ignore]
fn test_stops_translation_if_stop_codon_present() {
   let info = parse(make_pairs());
   assert_eq!(
       info.of_rna("AUGUUUUAA"),
       Some(vec!["methionine", "phenylalanine"])
   );
}

#[test]
//#[ignore]
fn test_stops_translation_of_longer_strand() {
   let info = parse(make_pairs());
   assert_eq!(
       info.of_rna("UGGUGUUAUUAAUGGUUU"),
       Some(vec!["tryptophan", "cysteine", "tyrosine"])
   );
}

#[test]
//#[ignore]
fn test_invalid_codons() {
   let info = parse(make_pairs());
   assert!(info.of_rna("CARROT").is_none());
}

// The input data constructor. Returns a list of codon, name pairs.
fn make_pairs() -> Vec<(&'static str, &'static str)> {
   let grouped = vec![
       ("isoleucine", vec!["AUU", "AUC", "AUA"]),
       ("valine", vec!["GUU", "GUC", "GUA", "GUG"]),
       ("phenylalanine", vec!["UUU", "UUC"]),
       ("methionine", vec!["AUG"]),
       ("cysteine", vec!["UGU", "UGC"]),
       ("alanine", vec!["GCU", "GCC", "GCA", "GCG"]),
       ("glycine", vec!["GGU", "GGC", "GGA", "GGG"]),
       ("proline", vec!["CCU", "CCC", "CCA", "CCG"]),
       ("threonine", vec!["ACU", "ACC", "ACA", "ACG"]),
       ("serine", vec!["AGU", "AGC"]),
       ("tyrosine", vec!["UAU", "UAC"]),
       ("tryptophan", vec!["UGG"]),
       ("glutamine", vec!["CAA", "CAG"]),
       ("asparagine", vec!["AAU", "AAC"]),
       ("histidine", vec!["CAU", "CAC"]),
       ("glutamic acid", vec!["GAA", "GAG"]),
       ("aspartic acid", vec!["GAU", "GAC"]),
       ("lysine", vec!["AAA", "AAG"]),
       ("arginine", vec!["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"]),
       ("stop codon", vec!["UAA", "UAG", "UGA"]),
   ];
   let mut pairs = Vec::<(&'static str, &'static str)>::new();
   for (name, codons) in grouped.into_iter() {
       for codon in codons {
           pairs.push((codon, name));
       }
   }
   pairs.sort_by(|&(_, a), &(_, b)| a.cmp(b));
   return pairs;
}

#}

4. 答案


# #![allow(unused_variables)]
#fn main() {
use std::collections::HashMap;

pub struct CodonInfo<'a> {
   actual_codons: HashMap<&'a str, &'a str>,
}

pub fn parse<'a>(pairs: Vec<(&'a str, &'a str)>) -> CodonInfo<'a> {
   CodonInfo {
       actual_codons: pairs.into_iter().collect(),
   }
}

impl<'a> CodonInfo<'a> {
   pub fn name_for(&self, codon: &str) -> Option<&'a str> {
       self.actual_codons.get(&codon).map(|&acid| acid)
   }

   pub fn of_rna(&self, strand: &str) -> Option<Vec<&'a str>> {
       strand
           .chars()
           .collect::<Vec<char>>()
           .chunks(3)
           .map(|chars| self.name_for(&chars.iter().collect::<String>()))
           .take_while(|result| result.is_none() || result.unwrap() != "stop codon")
           .collect()
   }
}

#}



填充/相关