Parallel Letter Frequency

1. Readme

并发字母频率

使用并行计算,文本中的字母频率。

并行性是并行的,也可以按顺序进行。一个常见的例子是计算字母的频率。创建一个函数,返回文本列表中每个字母的总频率,并使用并行性。

Rust 中的并发性字母频率

在这里了解更多关于 Rust 的并发性:

加分

这个练习还包括一个基准,以一个顺序实现为基线。您可以将您的解决方案与基准进行比较。观察不同大小的输入,对每个性能的影响。可以使用并行编程技术。来超越基准吗?

在本文中,test::Bencher 是不稳定的,只能在nightlyRust 可用。用 Cargo 运行基准:

cargo bench

如果你使用 rustup.rs:

rustup run nightly cargo bench

了解 nightly Rust 的更多信息:

2. 开始你的表演

use std::collections::HashMap;

pub fn frequency(input: &[&str], worker_count: usize) -> HashMap<char, usize> {
   unimplemented!(
       "Count the frequency of letters in the given input '{:?}'. Ensure that you are using {} to process the input.",
       input,
       match worker_count {
           1 => format!("1 worker"),
           _ => format!("{} workers", worker_count),
       }
   );
}

3. 测试代码查看


# #![allow(unused_variables)]
#fn main() {
// use std::collections::HashMap;

// Poem by Friedrich Schiller. The corresponding music is the European Anthem.
const ODE_AN_DIE_FREUDE: [&'static str; 8] = [
   "Freude schöner Götterfunken",
   "Tochter aus Elysium,",
   "Wir betreten feuertrunken,",
   "Himmlische, dein Heiligtum!",
   "Deine Zauber binden wieder",
   "Was die Mode streng geteilt;",
   "Alle Menschen werden Brüder,",
   "Wo dein sanfter Flügel weilt.",
];

// Dutch national anthem
const WILHELMUS: [&'static str; 8] = [
   "Wilhelmus van Nassouwe",
   "ben ik, van Duitsen bloed,",
   "den vaderland getrouwe",
   "blijf ik tot in den dood.",
   "Een Prinse van Oranje",
   "ben ik, vrij, onverveerd,",
   "den Koning van Hispanje",
   "heb ik altijd geëerd.",
];

// American national anthem
const STAR_SPANGLED_BANNER: [&'static str; 8] = [
   "O say can you see by the dawn's early light,",
   "What so proudly we hailed at the twilight's last gleaming,",
   "Whose broad stripes and bright stars through the perilous fight,",
   "O'er the ramparts we watched, were so gallantly streaming?",
   "And the rockets' red glare, the bombs bursting in air,",
   "Gave proof through the night that our flag was still there;",
   "O say does that star-spangled banner yet wave,",
   "O'er the land of the free and the home of the brave?",
];

#[test]
fn test_no_texts() {
   assert_eq!(frequency(&[], 4), HashMap::new());
}

#[test]
//#[ignore]
fn test_one_letter() {
   let mut hm = HashMap::new();
   hm.insert('a', 1);
   assert_eq!(frequency(&["a"], 4), hm);
}

#[test]
//#[ignore]
fn test_case_insensitivity() {
   let mut hm = HashMap::new();
   hm.insert('a', 2);
   assert_eq!(frequency(&["aA"], 4), hm);
}

#[test]
//#[ignore]
fn test_many_empty_lines() {
   let mut v = Vec::with_capacity(1000);
   for _ in 0..1000 {
       v.push("");
   }
   assert_eq!(frequency(&v[..], 4), HashMap::new());
}

#[test]
//#[ignore]
fn test_many_times_same_text() {
   let mut v = Vec::with_capacity(1000);
   for _ in 0..1000 {
       v.push("abc");
   }
   let mut hm = HashMap::new();
   hm.insert('a', 1000);
   hm.insert('b', 1000);
   hm.insert('c', 1000);
   assert_eq!(frequency(&v[..], 4), hm);
}

#[test]
//#[ignore]
fn test_punctuation_doesnt_count() {
   assert!(!frequency(&WILHELMUS, 4).contains_key(&','));
}

#[test]
//#[ignore]
fn test_numbers_dont_count() {
   assert!(!frequency(&["Testing, 1, 2, 3"], 4).contains_key(&'1'));
}

#[test]
//#[ignore]
fn test_all_three_anthems_1_worker() {
   let mut v = Vec::new();
   for anthem in [ODE_AN_DIE_FREUDE, WILHELMUS, STAR_SPANGLED_BANNER].iter() {
       for line in anthem.iter() {
           v.push(*line);
       }
   }
   let freqs = frequency(&v[..], 1);
   assert_eq!(freqs.get(&'a'), Some(&49));
   assert_eq!(freqs.get(&'t'), Some(&56));
   assert_eq!(freqs.get(&'ü'), Some(&2));
}

#[test]
//#[ignore]
fn test_all_three_anthems_3_workers() {
   let mut v = Vec::new();
   for anthem in [ODE_AN_DIE_FREUDE, WILHELMUS, STAR_SPANGLED_BANNER].iter() {
       for line in anthem.iter() {
           v.push(*line);
       }
   }
   let freqs = frequency(&v[..], 3);
   assert_eq!(freqs.get(&'a'), Some(&49));
   assert_eq!(freqs.get(&'t'), Some(&56));
   assert_eq!(freqs.get(&'ü'), Some(&2));
}

#}

4. 答案


# #![allow(unused_variables)]
#fn main() {
use std::collections::HashMap;
use std::sync::mpsc::channel;
use std::thread;

/// Compute the frequency of each letter (technically of each unicode codepoint) using the given
/// number of worker threads.
pub fn frequency(texts: &[&str], num_workers: usize) -> HashMap<char, usize> {
   assert!(num_workers > 0);
   let part_size_floor = texts.len() / num_workers;
   let rem = texts.len() % num_workers;
   let part_size = if rem > 0 {
       part_size_floor + 1
   } else {
       part_size_floor
   };
   let mut parts: Vec<Vec<String>> = Vec::with_capacity(part_size);
   for _ in 0..num_workers {
       parts.push(Vec::with_capacity(part_size));
   }
   let mut i = 0;
   for line in texts.iter() {
       // We'll need to clone those strings in order to satisfy some lifetime guarantees. Basically
       // it's hard for the system to be sure that the threads spawned don't outlive the strings.
       parts[i].push(line.to_string());
       i = (i + 1) % num_workers;
   }

   let (tx, rx) = channel();

   for part in parts {
       let tx = tx.clone();
       thread::spawn(move || {
           tx.send(count(part)).unwrap();
       });
   }

   let mut results: HashMap<char, usize> = HashMap::new();
   for _ in 0..num_workers {
       let part_results = rx.recv().unwrap();
       for (c, n) in part_results.into_iter() {
           *results.entry(c).or_insert(0) += n;
       }
   }
   results
}

fn count(lines: Vec<String>) -> HashMap<char, usize> {
   let mut results: HashMap<char, usize> = HashMap::new();
   for line in lines.iter() {
       for c in line.chars() {
           if c.is_alphabetic() {
               *results.entry(c.to_lowercase().next().unwrap()).or_insert(0) += 1;
           }
       }
   }
   results
}

#}



填充/相关