Variable Length Quantity
1. Readme
可变长度数量
实现可变长度数量编码和解码.
这项工作的目标是实现VLQ的编码/解码。
简而言之,此编码的目标是以节省字节的方式,对整数值进行编码。只有每个字节的前 7 位是有效的(右对齐;有点像 ASCII 字节)。因此,如果您有 32 位值,则必须解开为一系列 7 位字节。当然,根据您的整数,您将拥有可变数量的字节。要指出哪个是系列的最后一个字节,请将 #7 位清零。而所有前面的字节中,您都要设置#7 位。
所以,如果一个整数介于0-127
,它可以表示为一个字节。虽然 VLQ 可以处理任意大小的数字,但对于本练习,我们将仅限于适合 32 位无符号整数的数字。以下是整数作为 32 位值的示例,以及它们转换为的可变长度数量:
NUMBER VARIABLE QUANTITY
00000000 00
00000040 40
0000007F 7F
00000080 81 00
00002000 C0 00
00003FFF FF 7F
00004000 81 80 00
00100000 C0 80 00
001FFFFF FF FF 7F
00200000 81 80 80 00
08000000 C0 80 80 00
0FFFFFFF FF FF FF 7F
资源
一个糟糕的 Splice 开发人员,必须实现 MIDI 编码/解码.https://splice.com
2. 开始你的表演
#[derive(Debug, PartialEq)] pub enum Error { IncompleteNumber, Overflow, } /// Convert a list of numbers to a stream of bytes encoded with variable length encoding. pub fn to_bytes(values: &[u32]) -> Vec<u8> { unimplemented!("Convert the values {:?} to a list of bytes", values) } /// Given a stream of bytes, extract all numbers which are encoded in there. pub fn from_bytes(bytes: &[u8]) -> Result<Vec<u32>, Error> { unimplemented!("Convert the list of bytes {:?} to a list of numbers", bytes) }
3. 测试代码查看
# #![allow(unused_variables)] #fn main() { #[test] fn to_single_byte() { assert_eq!(&[0x00], to_bytes(&[0x00]).as_slice()); assert_eq!(&[0x40], to_bytes(&[0x40]).as_slice()); assert_eq!(&[0x7f], to_bytes(&[0x7f]).as_slice()); } #[test] //#[ignore] fn to_double_byte() { assert_eq!(&[0x81, 0x00], to_bytes(&[0x80]).as_slice()); assert_eq!(&[0xc0, 0x00], to_bytes(&[0x2000]).as_slice()); assert_eq!(&[0xff, 0x7f], to_bytes(&[0x3fff]).as_slice()); } #[test] //#[ignore] fn to_triple_byte() { assert_eq!(&[0x81, 0x80, 0x00], to_bytes(&[0x4000]).as_slice()); assert_eq!(&[0xc0, 0x80, 0x00], to_bytes(&[0x10_0000]).as_slice()); assert_eq!(&[0xff, 0xff, 0x7f], to_bytes(&[0x1f_ffff]).as_slice()); } #[test] //#[ignore] fn to_quadruple_byte() { assert_eq!(&[0x81, 0x80, 0x80, 0x00], to_bytes(&[0x20_0000]).as_slice()); assert_eq!( &[0xc0, 0x80, 0x80, 0x00], to_bytes(&[0x0800_0000]).as_slice() ); assert_eq!( &[0xff, 0xff, 0xff, 0x7f], to_bytes(&[0x0fff_ffff]).as_slice() ); } #[test] //#[ignore] fn to_quintuple_byte() { assert_eq!( &[0x81, 0x80, 0x80, 0x80, 0x00], to_bytes(&[0x1000_0000]).as_slice() ); assert_eq!( &[0x8f, 0xf8, 0x80, 0x80, 0x00], to_bytes(&[0xff00_0000]).as_slice() ); assert_eq!( &[0x8f, 0xff, 0xff, 0xff, 0x7f], to_bytes(&[0xffff_ffff]).as_slice() ); } #[test] //#[ignore] fn from_bytes_test() { assert_eq!(Ok(vec![0x7f]), from_bytes(&[0x7f])); assert_eq!(Ok(vec![0x2000]), from_bytes(&[0xc0, 0x00])); assert_eq!(Ok(vec![0x1f_ffff]), from_bytes(&[0xff, 0xff, 0x7f])); assert_eq!(Ok(vec![0x20_0000]), from_bytes(&[0x81, 0x80, 0x80, 0x00])); assert_eq!( Ok(vec![0xffff_ffff]), from_bytes(&[0x8f, 0xff, 0xff, 0xff, 0x7f]) ); } #[test] //#[ignore] fn to_bytes_multiple_values() { assert_eq!(&[0x40, 0x7f], to_bytes(&[0x40, 0x7f]).as_slice()); assert_eq!( &[0x81, 0x80, 0x00, 0xc8, 0xe8, 0x56], to_bytes(&[0x4000, 0x12_3456]).as_slice() ); assert_eq!( &[ 0xc0, 0x00, 0xc8, 0xe8, 0x56, 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0x7f, 0x81, 0x80, 0x00, ], to_bytes(&[0x2000, 0x12_3456, 0x0fff_ffff, 0x00, 0x3fff, 0x4000]).as_slice() ); } #[test] //#[ignore] fn from_bytes_multiple_values() { assert_eq!( Ok(vec![0x2000, 0x12_3456, 0x0fff_ffff, 0x00, 0x3fff, 0x4000]), from_bytes(&[ 0xc0, 0x00, 0xc8, 0xe8, 0x56, 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0x7f, 0x81, 0x80, 0x00, ]) ); } #[test] //#[ignore] fn incomplete_byte_sequence() { assert_eq!(Err(Error::IncompleteNumber), from_bytes(&[0xff])); } #[test] //#[ignore] fn zero_incomplete_byte_sequence() { assert_eq!(Err(Error::IncompleteNumber), from_bytes(&[0x80])); } #[test] //#[ignore] fn overflow_u32() { assert_eq!( Err(Error::Overflow), from_bytes(&[0xff, 0xff, 0xff, 0xff, 0x7f]) ); } #[test] //#[ignore] fn chained_execution_is_identity() { let test = &[0xf2, 0xf6, 0x96, 0x9c, 0x3b, 0x39, 0x2e, 0x30, 0xb3, 0x24]; assert_eq!(Ok(Vec::from(&test[..])), from_bytes(&to_bytes(test))); } #}
4. 答案
# #![allow(unused_variables)] #fn main() { #[derive(Debug, PartialEq)] pub enum Error { IncompleteNumber, Overflow, } /// Convert a list of numbers to a stream of bytes encoded with variable length encoding. pub fn to_bytes(values: &[u32]) -> Vec<u8> { let mut res = vec![]; for value in values { res.append(&mut to_bytes_single(*value)); } res } fn to_bytes_single(mut value: u32) -> Vec<u8> { // over allocates, but avoids growth let mut res = Vec::with_capacity(4); // 0 must be handled specially, because we need to push one byte if value == 0 { return vec![0]; } while value > 0 { // take the lower 7 bits let mut tmp = (value & 0x7f) as u8; // remove them from the original value value >>= 7; // set continuation bit if !res.is_empty() { tmp |= 0x80; } res.push(tmp); } // order is wrong due to the way we pushed the data onto it res.reverse(); res } // Alternative solution with hardcoded borders // /// Convert a list of numbers to a stream of bytes encoded with variable length encoding. // pub fn to_bytes(values: &[u32]) -> Vec<u8> { // let mut res = vec![]; // // for &value in values { // if value <= 0x7f { // res.push(value as u8); // } else if value <= 0x3fff { // res.push(((value >> 7) & 0xff) as u8 | 0x80); // res.push((value & 0x7f) as u8); // } else if value <= 0x1f_ffff { // res.push(((value >> 14) & 0xff) as u8 | 0x80); // res.push(((value >> 7) & 0xff) as u8 | 0x80); // res.push((value & 0x7f) as u8); // } else if value <= 0x0fff_ffff { // res.push(((value >> 21) & 0xff) as u8 | 0x80); // res.push(((value >> 14) & 0xff) as u8 | 0x80); // res.push(((value >> 7) & 0xff) as u8 | 0x80); // res.push((value & 0x7f) as u8); // } else { // res.push(((value >> 28) & 0xff) as u8 | 0x80); // res.push(((value >> 21) & 0xff) as u8 | 0x80); // res.push(((value >> 14) & 0xff) as u8 | 0x80); // res.push(((value >> 7) & 0xff) as u8 | 0x80); // res.push((value & 0x7f) as u8); // } // } // res // } /// Given a stream of bytes, extract all numbers which are encoded in there. pub fn from_bytes(bytes: &[u8]) -> Result<Vec<u32>, Error> { let mut res = vec![]; let mut tmp = 0; for (i, b) in bytes.iter().enumerate() { // test if first 7 bit are set, to check for overflow if (tmp & 0xfe_00_00_00) > 0 { return Err(Error::Overflow); } // append bytes of b to tmp tmp = (tmp << 7) | (b & 0x7f) as u32; if 0x80 & b == 0 { // continuation bit not set, number if complete res.push(tmp); tmp = 0; } else { // check for incomplete bytes if i + 1 == bytes.len() { // the next index would be past the end, // i.e. there are no more bytes. return Err(Error::IncompleteNumber); } } } Ok(res) } #}