Variable Length Quantity

1. Readme

可变长度数量

实现可变长度数量编码和解码.

这项工作的目标是实现VLQ的编码/解码。

简而言之,此编码的目标是以节省字节的方式,对整数值进行编码。只有每个字节的前 7 位是有效的(右对齐;有点像 ASCII 字节)。因此,如果您有 32 位值,则必须解开为一系列 7 位字节。当然,根据您的整数,您将拥有可变数量的字节。要指出哪个是系列的最后一个字节,请将 #7 位清零。而所有前面的字节中,您都要设置#7 位。

所以,如果一个整数介于0-127,它可以表示为一个字节。虽然 VLQ 可以处理任意大小的数字,但对于本练习,我们将仅限于适合 32 位无符号整数的数字。以下是整数作为 32 位值的示例,以及它们转换为的可变长度数量:

 NUMBER        VARIABLE QUANTITY
00000000              00
00000040              40
0000007F              7F
00000080             81 00
00002000             C0 00
00003FFF             FF 7F
00004000           81 80 00
00100000           C0 80 00
001FFFFF           FF FF 7F
00200000          81 80 80 00
08000000          C0 80 80 00
0FFFFFFF          FF FF FF 7F

资源

一个糟糕的 Splice 开发人员,必须实现 MIDI 编码/解码.https://splice.com

2. 开始你的表演

#[derive(Debug, PartialEq)]
pub enum Error {
   IncompleteNumber,
   Overflow,
}

/// Convert a list of numbers to a stream of bytes encoded with variable length encoding.
pub fn to_bytes(values: &[u32]) -> Vec<u8> {
   unimplemented!("Convert the values {:?} to a list of bytes", values)
}

/// Given a stream of bytes, extract all numbers which are encoded in there.
pub fn from_bytes(bytes: &[u8]) -> Result<Vec<u32>, Error> {
   unimplemented!("Convert the list of bytes {:?} to a list of numbers", bytes)
}

3. 测试代码查看


# #![allow(unused_variables)]
#fn main() {
#[test]
fn to_single_byte() {
   assert_eq!(&[0x00], to_bytes(&[0x00]).as_slice());
   assert_eq!(&[0x40], to_bytes(&[0x40]).as_slice());
   assert_eq!(&[0x7f], to_bytes(&[0x7f]).as_slice());
}

#[test]
//#[ignore]
fn to_double_byte() {
   assert_eq!(&[0x81, 0x00], to_bytes(&[0x80]).as_slice());
   assert_eq!(&[0xc0, 0x00], to_bytes(&[0x2000]).as_slice());
   assert_eq!(&[0xff, 0x7f], to_bytes(&[0x3fff]).as_slice());
}

#[test]
//#[ignore]
fn to_triple_byte() {
   assert_eq!(&[0x81, 0x80, 0x00], to_bytes(&[0x4000]).as_slice());
   assert_eq!(&[0xc0, 0x80, 0x00], to_bytes(&[0x10_0000]).as_slice());
   assert_eq!(&[0xff, 0xff, 0x7f], to_bytes(&[0x1f_ffff]).as_slice());
}

#[test]
//#[ignore]
fn to_quadruple_byte() {
   assert_eq!(&[0x81, 0x80, 0x80, 0x00], to_bytes(&[0x20_0000]).as_slice());
   assert_eq!(
       &[0xc0, 0x80, 0x80, 0x00],
       to_bytes(&[0x0800_0000]).as_slice()
   );
   assert_eq!(
       &[0xff, 0xff, 0xff, 0x7f],
       to_bytes(&[0x0fff_ffff]).as_slice()
   );
}

#[test]
//#[ignore]
fn to_quintuple_byte() {
   assert_eq!(
       &[0x81, 0x80, 0x80, 0x80, 0x00],
       to_bytes(&[0x1000_0000]).as_slice()
   );
   assert_eq!(
       &[0x8f, 0xf8, 0x80, 0x80, 0x00],
       to_bytes(&[0xff00_0000]).as_slice()
   );
   assert_eq!(
       &[0x8f, 0xff, 0xff, 0xff, 0x7f],
       to_bytes(&[0xffff_ffff]).as_slice()
   );
}

#[test]
//#[ignore]
fn from_bytes_test() {
   assert_eq!(Ok(vec![0x7f]), from_bytes(&[0x7f]));
   assert_eq!(Ok(vec![0x2000]), from_bytes(&[0xc0, 0x00]));
   assert_eq!(Ok(vec![0x1f_ffff]), from_bytes(&[0xff, 0xff, 0x7f]));
   assert_eq!(Ok(vec![0x20_0000]), from_bytes(&[0x81, 0x80, 0x80, 0x00]));
   assert_eq!(
       Ok(vec![0xffff_ffff]),
       from_bytes(&[0x8f, 0xff, 0xff, 0xff, 0x7f])
   );
}

#[test]
//#[ignore]
fn to_bytes_multiple_values() {
   assert_eq!(&[0x40, 0x7f], to_bytes(&[0x40, 0x7f]).as_slice());
   assert_eq!(
       &[0x81, 0x80, 0x00, 0xc8, 0xe8, 0x56],
       to_bytes(&[0x4000, 0x12_3456]).as_slice()
   );
   assert_eq!(
       &[
           0xc0, 0x00, 0xc8, 0xe8, 0x56, 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0x7f, 0x81, 0x80,
           0x00,
       ],
       to_bytes(&[0x2000, 0x12_3456, 0x0fff_ffff, 0x00, 0x3fff, 0x4000]).as_slice()
   );
}

#[test]
//#[ignore]
fn from_bytes_multiple_values() {
   assert_eq!(
       Ok(vec![0x2000, 0x12_3456, 0x0fff_ffff, 0x00, 0x3fff, 0x4000]),
       from_bytes(&[
           0xc0, 0x00, 0xc8, 0xe8, 0x56, 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0x7f, 0x81, 0x80,
           0x00,
       ])
   );
}

#[test]
//#[ignore]
fn incomplete_byte_sequence() {
   assert_eq!(Err(Error::IncompleteNumber), from_bytes(&[0xff]));
}

#[test]
//#[ignore]
fn zero_incomplete_byte_sequence() {
   assert_eq!(Err(Error::IncompleteNumber), from_bytes(&[0x80]));
}

#[test]
//#[ignore]
fn overflow_u32() {
   assert_eq!(
       Err(Error::Overflow),
       from_bytes(&[0xff, 0xff, 0xff, 0xff, 0x7f])
   );
}

#[test]
//#[ignore]
fn chained_execution_is_identity() {
   let test = &[0xf2, 0xf6, 0x96, 0x9c, 0x3b, 0x39, 0x2e, 0x30, 0xb3, 0x24];
   assert_eq!(Ok(Vec::from(&test[..])), from_bytes(&to_bytes(test)));
}

#}

4. 答案


# #![allow(unused_variables)]
#fn main() {
#[derive(Debug, PartialEq)]
pub enum Error {
   IncompleteNumber,
   Overflow,
}

/// Convert a list of numbers to a stream of bytes encoded with variable length encoding.
pub fn to_bytes(values: &[u32]) -> Vec<u8> {
   let mut res = vec![];

   for value in values {
       res.append(&mut to_bytes_single(*value));
   }
   res
}

fn to_bytes_single(mut value: u32) -> Vec<u8> {
   // over allocates, but avoids growth
   let mut res = Vec::with_capacity(4);

   // 0 must be handled specially, because we need to push one byte
   if value == 0 {
       return vec![0];
   }

   while value > 0 {
       // take the lower 7 bits
       let mut tmp = (value & 0x7f) as u8;
       // remove them from the original value
       value >>= 7;

       // set continuation bit
       if !res.is_empty() {
           tmp |= 0x80;
       }

       res.push(tmp);
   }

   // order is wrong due to the way we pushed the data onto it
   res.reverse();
   res
}

// Alternative solution with hardcoded borders
// /// Convert a list of numbers to a stream of bytes encoded with variable length encoding.
// pub fn to_bytes(values: &[u32]) -> Vec<u8> {
//     let mut res = vec![];
//
//     for &value in values {
//         if value <= 0x7f {
//             res.push(value as u8);
//         } else if value <= 0x3fff {
//             res.push(((value >> 7) & 0xff) as u8 | 0x80);
//             res.push((value & 0x7f) as u8);
//         } else if value <= 0x1f_ffff {
//             res.push(((value >> 14) & 0xff) as u8 | 0x80);
//             res.push(((value >> 7) & 0xff) as u8 | 0x80);
//             res.push((value & 0x7f) as u8);
//         } else if value <= 0x0fff_ffff {
//             res.push(((value >> 21) & 0xff) as u8 | 0x80);
//             res.push(((value >> 14) & 0xff) as u8 | 0x80);
//             res.push(((value >> 7) & 0xff) as u8 | 0x80);
//             res.push((value & 0x7f) as u8);
//         } else {
//             res.push(((value >> 28) & 0xff) as u8 | 0x80);
//             res.push(((value >> 21) & 0xff) as u8 | 0x80);
//             res.push(((value >> 14) & 0xff) as u8 | 0x80);
//             res.push(((value >> 7) & 0xff) as u8 | 0x80);
//             res.push((value & 0x7f) as u8);
//         }
//     }
//     res
// }

/// Given a stream of bytes, extract all numbers which are encoded in there.
pub fn from_bytes(bytes: &[u8]) -> Result<Vec<u32>, Error> {
   let mut res = vec![];
   let mut tmp = 0;
   for (i, b) in bytes.iter().enumerate() {
       // test if first 7 bit are set, to check for overflow
       if (tmp & 0xfe_00_00_00) > 0 {
           return Err(Error::Overflow);
       }

       // append bytes of b to tmp
       tmp = (tmp << 7) | (b & 0x7f) as u32;

       if 0x80 & b == 0 {
           // continuation bit not set, number if complete
           res.push(tmp);
           tmp = 0;
       } else {
           // check for incomplete bytes
           if i + 1 == bytes.len() {
               // the next index would be past the end,
               // i.e. there are no more bytes.
               return Err(Error::IncompleteNumber);
           }
       }
   }

   Ok(res)
}

#}



填充/相关