use crate::WaveletTree;
use bio::data_structures::rank_select::RankSelect;
use bv::BitVec;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::iter::FromIterator;
#[derive(Serialize, Deserialize, PartialEq, Debug)]
pub struct WaveletTreePointer<T: PartialEq + Copy> {
root_node: Box<WaveletTreeNode>,
alphabet: Vec<T>,
}
#[derive(Serialize, Deserialize)]
struct WaveletTreeNode {
bit_vec: RankSelect,
left_child: Option<Box<WaveletTreeNode>>,
right_child: Option<Box<WaveletTreeNode>>,
}
impl WaveletTreeNode {
fn new<T: PartialEq + Copy>(
input_string: Vec<T>,
alphabet: &[T],
) -> Option<Box<WaveletTreeNode>> {
if alphabet.len() >= 2 {
let (left_alphabet, right_alphabet) = alphabet.split_at(alphabet.len() / 2);
let left_string: Vec<T> = input_string
.clone()
.into_iter()
.filter(|c| left_alphabet.contains(c))
.collect();
let right_string: Vec<T> = input_string
.clone()
.into_iter()
.filter(|c| right_alphabet.contains(c))
.collect();
let mut bitvector: BitVec<u8> = BitVec::with_capacity(input_string.len() as u64);
input_string.iter().for_each(|character|
bitvector.push(right_alphabet.contains(&character)));
let rs = RankSelect::new(bitvector, super::SUPERBLOCK_SIZE);
let left_child = WaveletTreeNode::new(left_string, left_alphabet);
let right_child = WaveletTreeNode::new(right_string, right_alphabet);
Some(Box::new(WaveletTreeNode {
bit_vec: rs,
left_child,
right_child,
}))
} else {
None
}
}
fn access<T: PartialEq + Copy>(&self, position: u64, alphabet: &[T]) -> Option<T> {
if self.bit_vec.bits().len() <= position {
return None;
}
if alphabet.len() <= 1 {
return Some(alphabet[0]);
}
let (left_alphabet, right_alphabet) = alphabet.split_at(alphabet.len() / 2);
if !self.bit_vec.bits()[position] {
if let Some(ref lc) = self.left_child {
let num = self.bit_vec.rank_0(position).unwrap();
lc.access(num - 1, &left_alphabet)
} else {
Some(left_alphabet[0])
}
} else {
if let Some(ref rc) = self.right_child {
let num = self.bit_vec.rank_1(position).unwrap();
rc.access(num - 1, &right_alphabet)
} else {
Some(right_alphabet[0])
}
}
}
fn rank<T: PartialEq + Copy>(&self, alphabet: &[T], object: T, n: u64) -> Option<u64> {
let (left_alphabet, right_alphabet) = alphabet.split_at(alphabet.len() / 2);
if left_alphabet.contains(&object) {
if left_alphabet.len() == 1 {
self.bit_vec.rank_0(n)
} else {
if let Some(ref lc) = self.left_child {
match self.bit_vec.rank_0(n) {
None => None,
Some(0) => Some(0),
Some(i) => lc.rank(left_alphabet, object, i - 1),
}
} else {
panic!("rank: There should be a left child but there isn't!");
}
}
} else if right_alphabet.contains(&object) {
if right_alphabet.len() == 1 {
self.bit_vec.rank_1(n)
} else {
if let Some(ref rc) = self.right_child {
match self.bit_vec.rank_1(n) {
None => None,
Some(0) => Some(0),
Some(i) => rc.rank(right_alphabet, object, i - 1),
}
} else {
panic!("There should be a right child but there isn't!");
}
}
} else {
None
}
}
fn select<T: PartialEq + Copy>(&self, character: T, n: u64, alphabet: &[T]) -> Option<u64> {
let (left_alphabet, right_alphabet) = alphabet.split_at(alphabet.len() / 2);
if left_alphabet.contains(&character) {
if let Some(ref lc) = self.left_child {
match lc.select(character, n, left_alphabet) {
Some(x) => self.bit_vec.select_0(x + 1),
None => None,
}
} else {
self.bit_vec.select_0(n)
}
} else if right_alphabet.contains(&character) {
if let Some(ref rc) = self.right_child {
match rc.select(character, n, right_alphabet) {
Some(x) => self.bit_vec.select_1(x + 1),
None => None,
}
} else {
self.bit_vec.select_1(n)
}
} else {
None
}
}
}
impl<T: PartialEq + Copy> WaveletTree<T> for WaveletTreePointer<T> {
fn access(&self, position: u64) -> Option<T> {
self.root_node.access(position, &self.alphabet[..])
}
fn select(&self, object: T, n: u64) -> Option<u64> {
self.root_node.select(object, n, &self.alphabet[..])
}
fn rank(&self, object: T, n: u64) -> Option<u64> {
self.root_node.rank(&self.alphabet[..], object, n)
}
}
impl<T: PartialEq + Copy> WaveletTreePointer<T> {
pub fn new(vector: Vec<T>) -> WaveletTreePointer<T> {
let mut alphabet = Vec::new();
for v in vector.clone() {
if !alphabet.contains(&v) {
alphabet.push(v);
}
}
if alphabet.len() < 2 {
return WaveletTreePointer {
root_node: {
let mut bitvector = BitVec::new();
bitvector.resize(vector.len() as u64, true);
Box::new(WaveletTreeNode {
bit_vec: RankSelect::new(bitvector, super::SUPERBLOCK_SIZE),
left_child: None,
right_child: None,
})
},
alphabet,
};
}
let root_node =
WaveletTreeNode::new(vector, &alphabet)
.expect("Without a tree node the WaveletTree will be useless ");
WaveletTreePointer {
root_node,
alphabet,
}
}
}
impl PartialEq for WaveletTreeNode {
fn eq(&self, other: &WaveletTreeNode) -> bool {
self.bit_vec.bits() == other.bit_vec.bits()
&& self.left_child == other.left_child
&& self.right_child == other.right_child
}
}
impl PartialEq<&str> for WaveletTreePointer<char> {
fn eq(&self, other: &&str) -> bool {
if self.root_node.bit_vec.bits().len() as usize == other.chars().count() {
for (i, c) in other.chars().enumerate() {
match self.access(i as u64) {
None => return false,
Some(c2) => {
if c2 != c {
return false;
}
}
}
}
true
} else {
false
}
}
}
impl<T: PartialEq + Copy> PartialEq<Vec<T>> for WaveletTreePointer<T> {
fn eq(&self, other: &Vec<T>) -> bool {
if self.root_node.bit_vec.bits().len() as usize == other.len() {
for (i, c) in other.iter().enumerate() {
match self.access(i as u64) {
None => return false,
Some(c2) => {
if *c != c2 {
return false;
}
}
}
}
true
} else {
false
}
}
}
impl fmt::Debug for WaveletTreeNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"WaveletTreeNode {{ {:?}, l:{:?} r:{:?} }}",
self.bit_vec.bits(),
self.left_child,
self.right_child
)
}
}
impl From<String> for WaveletTreePointer<char> {
fn from(input: String) -> Self {
WaveletTreePointer::new(input.chars().collect())
}
}
impl From<&str> for WaveletTreePointer<char> {
fn from(input: &str) -> Self {
WaveletTreePointer::new(input.chars().collect())
}
}
impl<T: PartialEq + Copy> From<Vec<T>> for WaveletTreePointer<T> {
fn from(input: Vec<T>) -> Self {
WaveletTreePointer::new(input)
}
}
impl<T: PartialEq + Copy> FromIterator<T> for WaveletTreePointer<T> {
fn from_iter<I: IntoIterator<Item = T>>(input: I) -> Self {
WaveletTreePointer::new(input.into_iter().collect())
}
}
pub struct TreeIteratorPointer<T: PartialEq + Copy> {
index: usize,
tree: WaveletTreePointer<T>,
}
impl<T: PartialEq + Copy> Iterator for TreeIteratorPointer<T> {
type Item = T;
fn next(&mut self) -> Option<T> {
let result = self.tree.access(self.index as u64);
self.index += 1;
result
}
}
impl<T: PartialEq + Copy> IntoIterator for WaveletTreePointer<T> {
type Item = T;
type IntoIter = TreeIteratorPointer<T>;
fn into_iter(self) -> Self::IntoIter {
TreeIteratorPointer {
index: 0,
tree: self,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::WaveletTree;
use bv::BitVec;
use bv::BitsMut;
use unicode_segmentation::UnicodeSegmentation;
#[test]
fn test_2_letter_tree() {
let two_tree: WaveletTreePointer<char> = WaveletTreePointer::from("ab");
let alphabet: Vec<char> = "ab".chars().collect();
assert_eq!(two_tree.alphabet, alphabet);
assert_eq!(two_tree.root_node.right_child, None);
assert_eq!(two_tree.root_node.left_child, None);
let mut bits: BitVec<u8> = BitVec::new_fill(false, 2);
bits.set_bit(0, false);
bits.set_bit(1, true);
assert_eq!(*two_tree.root_node.bit_vec.bits(), bits);
}
#[test]
fn test_5_letter_tree() {
use itertools::Itertools;
let input_string = "abcda";
let five_tree: WaveletTreePointer<char> = WaveletTreePointer::from(input_string);
let alphabet: Vec<char> = input_string.chars().unique().collect();
assert_eq!(five_tree.alphabet, alphabet);
let mut bits: BitVec<u8> = BitVec::new_fill(false, 5);
bits.set_bit(2, true);
bits.set_bit(3, true);
assert_eq!(*five_tree.root_node.bit_vec.bits(), bits);
let left_child = five_tree.root_node.left_child.unwrap();
let left_child_bits = BitVec::from_bits(&[false, true, false]);
assert_eq!(*left_child.bit_vec.bits(), left_child_bits);
let right_child = five_tree.root_node.right_child.unwrap();
let right_child_bits = BitVec::from_bits(&[false, true]);
assert_eq!(*right_child.bit_vec.bits(), right_child_bits);
}
#[test]
fn test_7_letter_tree() {
let string: Vec<char> = "abcdefg".chars().collect();
let seven_tree: WaveletTreePointer<char> = WaveletTreePointer::new(string);
assert!(seven_tree.root_node.left_child.is_some());
assert!(seven_tree.root_node.right_child.is_some());
let lc = seven_tree.root_node.left_child.unwrap();
let rc = seven_tree.root_node.right_child.unwrap();
assert!(lc.left_child.is_none());
assert!(lc.right_child.is_some());
assert!(rc.left_child.is_some());
assert!(rc.right_child.is_some());
let left_right = BitVec::from_bits(&[false, true]);
assert_eq!(*lc.right_child.unwrap().bit_vec.bits(), left_right);
}
#[test]
fn test_new() {
let test_string = "ab";
let w_tree = WaveletTreePointer::from(test_string);
let mut bits: BitVec<u8> = BitVec::new_fill(false, 2);
bits.set_bit(0, false);
bits.set_bit(1, true);
let rs = RankSelect::new(bits, crate::SUPERBLOCK_SIZE);
let wavelet_tree_node = Box::new(WaveletTreeNode {
bit_vec: rs,
left_child: None,
right_child: None,
});
let wavelet_tree = WaveletTreePointer {
alphabet: vec!['a', 'b'],
root_node: wavelet_tree_node,
};
assert_eq!(w_tree, wavelet_tree);
}
#[test]
fn test_access_empty() {
let w_tree = WaveletTreePointer::from("");
assert_eq!(None, w_tree.access(0));
}
#[test]
fn test_access_1_letter() {
let test_string: Vec<char> = "a".chars().collect();
let w_tree = WaveletTreePointer::from("a");
assert_eq!(test_string[0], w_tree.access(0).unwrap());
assert_eq!(None, w_tree.access(1));
}
#[test]
fn test_access_7_letter() {
let test_string: Vec<char> = "abcdefg".chars().collect();
let w_tree = WaveletTreePointer::new(test_string.clone());
assert_eq!(test_string[0], w_tree.access(0).unwrap());
assert_eq!(test_string[1], w_tree.access(1).unwrap());
assert_eq!(test_string[2], w_tree.access(2).unwrap());
assert_eq!(test_string[4], w_tree.access(4).unwrap());
assert_eq!(test_string[6], w_tree.access(6).unwrap());
assert_eq!(None, w_tree.access(7));
}
#[test]
fn test_select_basic() {
let test_string = "cabdacdbabadcab";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.select('c', 2), Some(5));
}
#[test]
fn test_select_outside_alphabet() {
let test_string = "cabdacdbabadcab";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.select('f', 2), None);
}
#[test]
fn test_select_out_of_bounds() {
let test_string = "cabdacdbabadcab";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.select('c', 4), None);
}
#[test]
fn test_serialize_deserialize() {
let test_string = "cbacbcbcbbcabcabcabcabbca";
let w_tree = WaveletTreePointer::from(test_string);
let serialized = serde_json::to_string(&w_tree).unwrap();
let w_tree2: WaveletTreePointer<char> = serde_json::from_str(&serialized).unwrap();
assert_eq!(w_tree, w_tree2)
}
#[test]
fn test_select_5_letter() {
let test_string = "abcde";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.select('a', 1), Some(0));
assert_eq!(w_tree.select('b', 1), Some(1));
assert_eq!(w_tree.select('c', 1), Some(2));
assert_eq!(w_tree.select('d', 1), Some(3));
assert_eq!(w_tree.select('e', 1), Some(4));
}
#[test]
fn test_select_2_letter() {
let test_string = "ab";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.select('a', 1), Some(0));
assert_eq!(w_tree.select('b', 1), Some(1));
assert_eq!(w_tree.select('c', 1), None);
assert_eq!(w_tree.select('a', 2), None);
assert_eq!(w_tree.select('b', 3), None);
}
#[test]
fn test_rank_2_letters() {
let test_string = "ababababababab";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.rank('a', 0), Some(1));
assert_eq!(w_tree.rank('b', 0), Some(0));
assert_eq!(w_tree.rank('a', 6), Some(4));
assert_eq!(w_tree.rank('b', 6), Some(3));
assert_eq!(w_tree.rank('a', 13), Some(7));
assert_eq!(w_tree.rank('b', 13), Some(7));
assert_eq!(w_tree.rank('b', 17), None);
assert_eq!(w_tree.rank('c', 5), None);
}
#[test]
fn test_rank_5_letter() {
let test_string = "abcde";
let w_tree = WaveletTreePointer::from(test_string);
assert_eq!(w_tree.rank('a', 0), Some(1));
assert_eq!(w_tree.rank('b', 1), Some(1));
assert_eq!(w_tree.rank('c', 2), Some(1));
assert_eq!(w_tree.rank('d', 3), Some(1));
assert_eq!(w_tree.rank('e', 4), Some(1));
}
#[test]
fn test_rank_unicode() {
let test_string = "Hello world, こんにちは世界, Привет, мир";
let test_string = UnicodeSegmentation::graphemes(test_string, true).collect::<Vec<&str>>();
let w_tree = WaveletTreePointer::from_iter(test_string);
assert_eq!(w_tree.rank("o", 4), Some(1));
assert_eq!(w_tree.rank("世", 32), Some(1));
assert_eq!(w_tree.rank("и", 32), Some(2));
assert_eq!(w_tree.rank("o", 16), Some(2));
assert_eq!(w_tree.rank("世", 16), Some(0));
assert_eq!(w_tree.rank("и", 16), Some(0));
assert_eq!(w_tree.rank("o", 0), Some(0));
assert_eq!(w_tree.rank("世", 0), Some(0));
assert_eq!(w_tree.rank("и", 0), Some(0));
assert_eq!(w_tree.rank("木", 32), None);
}
#[test]
fn test_fail_management() {
let a = WaveletTreePointer::from("");
assert_eq!(a.access(0), None);
assert_eq!(a.rank('a', 0), None);
assert_eq!(a.select('a', 0), None);
let b = WaveletTreePointer::from("abc");
assert_eq!(b.access(4), None);
assert_eq!(b.rank('b', 4), None);
assert_eq!(b.select('b', 2), None);
assert_eq!(b.rank('d', 1), None);
assert_eq!(b.select('d', 1), None);
assert_eq!(b.select('a', 0), None);
assert_eq!(b.rank('c', 1), Some(0));
}
#[test]
fn test_partialeq_str() {
let test_string = "Hello world, こんにちは世界, Привет, мир";
let w_tree: WaveletTreePointer<char> = test_string.into();
assert!(w_tree == test_string);
let test_string_wrong = "Hello worlt, こんにちは世界, Привет, мир";
assert!(w_tree != test_string_wrong);
let test_string_shorter = "Hello world";
assert!(w_tree != test_string_shorter);
}
#[test]
fn test_partialeq_vec() {
let vec = vec![1, 2, 3, 4, 5, 1, 2, 4, 1, 3, 5, 2, 4];
let w_tree: WaveletTreePointer<i32> = vec.clone().into();
assert!(w_tree == vec);
let vec_wrong = vec![1, 2, 3, 4, 5, 1, 2, 4, 1, 3, 7, 2, 4];
assert!(w_tree != vec_wrong);
let vec_short = vec![1, 2, 3, 4, 5];
assert!(w_tree != vec_short);
}
#[test]
fn test_into_iter() {
let test_str: String = String::from("Hello world");
let w_tree: WaveletTreePointer<char> = test_str.clone().into();
test_str.chars().eq(w_tree.into_iter());
}
}