Protein Translation
1. Readme
蛋白质转译
将 RNA 序列转译成蛋白质.
RNA 可以分解为三个称为密码子的核苷酸序列,然后转译成多肽,如下:
RNA:"AUGUUUUCU"
=>转译成
密码子:"AUG", "UUU", "UCU"
=>其成为具有以下序列的多肽=>
蛋白:"Methionine", "Phenylalanine", "Serine"
这有 64 个密码子,而这些密码子又相当于 20 个氨基酸;然而,在本练习中,所有密码子序列和所得氨基酸都不重要。如果它适用于一个密码子,该程序应该适用于所有这些密码子。但是,您可以随意扩展测试套件中的列表以包含它们.
还有三个终止密码子(也称为’STOP’密码子);如果遇到任何这些密码子(通过核糖体),那么所有转译结束,并终止蛋白质。
之后的所有后续密码子都会被忽略,如下所示:
RNA:"AUGUUUUCUUAAAUG"
=>
密码:"AUG", "UUU", "UCU", "UAA", "AUG"
=>
蛋白:"Methionine", "Phenylalanine", "Serine"
注意终止密码子"UAA"
终止转译,最终的蛋氨酸,不会转译成蛋白质序列。
以下是本练习所需的密码子和产生的氨基酸。
密码子 | 蛋白 |
---|---|
AUG | 蛋氨酸 |
UUU,UUC | 苯丙氨酸 |
UUA,UUG | 亮氨酸 |
UCU,UCC,UCA,UCG | 丝氨酸 |
UAU,UAC | 酪氨酸 |
UGU,UGC | 半胱氨酸 |
UGG | 色氨酸 |
UAA,UAG,UGA | STOP |
学习更多关于蛋白质转译:维基百科
资源
Tyler Long
2. 开始你的表演
use std::marker::PhantomData; pub struct CodonsInfo<'a> { // This field is here to make the template compile and not to // complain about unused type lifetime parameter "'a". Once you start // solving the exercise, delete this field and the 'std::marker::PhantomData' // import. phantom: PhantomData<&'a ()>, } impl<'a> CodonsInfo<'a> { pub fn name_for(&self, codon: &str) -> Option<&'a str> { unimplemented!( "Return the protein name for a '{}' codon or None, if codon string is invalid", codon ); } pub fn of_rna(&self, rna: &str) -> Option<Vec<&'a str>> { unimplemented!("Return a list of protein names that correspond to the '{}' RNA string or None if the RNA string is invalid", rna); } } pub fn parse<'a>(pairs: Vec<(&'a str, &'a str)>) -> CodonsInfo<'a> { unimplemented!( "Construct a new CodonsInfo struct from given pairs: {:?}", pairs ); }
3. 测试代码查看
# #![allow(unused_variables)] #fn main() { #[test] fn test_methionine() { let info = parse(make_pairs()); assert_eq!(info.name_for("AUG"), Some("methionine")); } #[test] //#[ignore] fn test_cysteine_tgt() { let info = parse(make_pairs()); assert_eq!(info.name_for("UGU"), Some("cysteine")); } #[test] //#[ignore] fn test_stop() { let info = parse(make_pairs()); assert_eq!(info.name_for("UAA"), Some("stop codon")); } #[test] //#[ignore] fn test_valine() { let info = parse(make_pairs()); assert_eq!(info.name_for("GUU"), Some("valine")); } #[test] //#[ignore] fn test_isoleucine() { let info = parse(make_pairs()); assert_eq!(info.name_for("AUU"), Some("isoleucine")); } #[test] //#[ignore] fn test_arginine_name() { let info = parse(make_pairs()); assert_eq!(info.name_for("CGA"), Some("arginine")); assert_eq!(info.name_for("AGA"), Some("arginine")); assert_eq!(info.name_for("AGG"), Some("arginine")); } #[test] //#[ignore] fn empty_is_invalid() { let info = parse(make_pairs()); assert!(info.name_for("").is_none()); } #[test] //#[ignore] fn x_is_not_shorthand_so_is_invalid() { let info = parse(make_pairs()); assert!(info.name_for("VWX").is_none()); } #[test] //#[ignore] fn too_short_is_invalid() { let info = parse(make_pairs()); assert!(info.name_for("AU").is_none()); } #[test] //#[ignore] fn too_long_is_invalid() { let info = parse(make_pairs()); assert!(info.name_for("ATTA").is_none()); } #[test] //#[ignore] fn test_translates_rna_strand_into_correct_protein() { let info = parse(make_pairs()); assert_eq!( info.of_rna("AUGUUUUGG"), Some(vec!["methionine", "phenylalanine", "tryptophan"]) ); } #[test] //#[ignore] fn test_stops_translation_if_stop_codon_present() { let info = parse(make_pairs()); assert_eq!( info.of_rna("AUGUUUUAA"), Some(vec!["methionine", "phenylalanine"]) ); } #[test] //#[ignore] fn test_stops_translation_of_longer_strand() { let info = parse(make_pairs()); assert_eq!( info.of_rna("UGGUGUUAUUAAUGGUUU"), Some(vec!["tryptophan", "cysteine", "tyrosine"]) ); } #[test] //#[ignore] fn test_invalid_codons() { let info = parse(make_pairs()); assert!(info.of_rna("CARROT").is_none()); } // The input data constructor. Returns a list of codon, name pairs. fn make_pairs() -> Vec<(&'static str, &'static str)> { let grouped = vec![ ("isoleucine", vec!["AUU", "AUC", "AUA"]), ("valine", vec!["GUU", "GUC", "GUA", "GUG"]), ("phenylalanine", vec!["UUU", "UUC"]), ("methionine", vec!["AUG"]), ("cysteine", vec!["UGU", "UGC"]), ("alanine", vec!["GCU", "GCC", "GCA", "GCG"]), ("glycine", vec!["GGU", "GGC", "GGA", "GGG"]), ("proline", vec!["CCU", "CCC", "CCA", "CCG"]), ("threonine", vec!["ACU", "ACC", "ACA", "ACG"]), ("serine", vec!["AGU", "AGC"]), ("tyrosine", vec!["UAU", "UAC"]), ("tryptophan", vec!["UGG"]), ("glutamine", vec!["CAA", "CAG"]), ("asparagine", vec!["AAU", "AAC"]), ("histidine", vec!["CAU", "CAC"]), ("glutamic acid", vec!["GAA", "GAG"]), ("aspartic acid", vec!["GAU", "GAC"]), ("lysine", vec!["AAA", "AAG"]), ("arginine", vec!["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"]), ("stop codon", vec!["UAA", "UAG", "UGA"]), ]; let mut pairs = Vec::<(&'static str, &'static str)>::new(); for (name, codons) in grouped.into_iter() { for codon in codons { pairs.push((codon, name)); } } pairs.sort_by(|&(_, a), &(_, b)| a.cmp(b)); return pairs; } #}
4. 答案
# #![allow(unused_variables)] #fn main() { use std::collections::HashMap; pub struct CodonInfo<'a> { actual_codons: HashMap<&'a str, &'a str>, } pub fn parse<'a>(pairs: Vec<(&'a str, &'a str)>) -> CodonInfo<'a> { CodonInfo { actual_codons: pairs.into_iter().collect(), } } impl<'a> CodonInfo<'a> { pub fn name_for(&self, codon: &str) -> Option<&'a str> { self.actual_codons.get(&codon).map(|&acid| acid) } pub fn of_rna(&self, strand: &str) -> Option<Vec<&'a str>> { strand .chars() .collect::<Vec<char>>() .chunks(3) .map(|chars| self.name_for(&chars.iter().collect::<String>())) .take_while(|result| result.is_none() || result.unwrap() != "stop codon") .collect() } } #}