tudocomp
– The TU Dortmund Compression Framework
compressors/lz78u/SuffixTree.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <sdsl/suffix_trees.hpp>
4 #include <glog/logging.h>
5 
6 #include <sstream>
7 
8 //template<class bp_support = sdsl::bp_support_sada<> >
9 namespace tdc {
10 namespace lz78u {
11 
12 using namespace sdsl;
13 
18 struct SuffixTree {
19  using cst_t = cst_sada<>;
20  using node_type = cst_t::node_type;
21 
22  const cst_t& cst;
23  const cst_t::node_type root;
24  const rank_support_v5<1> m_bp_rank1;
25  const size_t internal_nodes;
26 
27  SuffixTree(const cst_t& _cst)
28  : cst(_cst)
29  , root(cst.root())
30  , m_bp_rank1(&cst.bp)
31  , internal_nodes(m_bp_rank1.rank(m_bp_rank1.size()) - cst.bp_rank_10.rank(m_bp_rank1.size()))
32  {
33  }
34 
35  cst_t::node_type parent(const cst_t::node_type& node)const {
36  return cst.parent(node);
37  }
43  cst_t::node_type level_anc(const cst_t::node_type& node, size_t depth)const {
44  VLOG(2) << "LevelAnc of node " << node << " on depth " << depth << " is " << cst.bp_support.level_anc(node, cst.node_depth(node)-depth) << std::endl;
45  return cst.bp_support.level_anc(node, cst.node_depth(node)-depth);
46  }
47 
48 
53  cst_t::node_type select_leaf(const cst_t::size_type& i) const {
54  return cst.select_leaf(i+1);
55  }
56 
57 
61  cst_t::node_type smallest_leaf()const {
62  return cst.select_leaf(cst.csa.isa[0]+1);
63  }
64  /*
65  * Given a leaf node, we select the leaf whose label is the label of node plus one.
66  */
67  cst_t::node_type next_leaf(const cst_t::node_type& node)const {
68  VLOG(2) << "Next Leaf " << node << "-> " << cst.select_leaf(cst.csa.psi[leafrank(node)]+1) << std::endl;
69  return cst.select_leaf(cst.csa.psi[leafrank(node)]+1);
70  }
71  /*
72  * apply m times next-leaf
73  */
74  cst_t::node_type next_mth_leaf(const cst_t::node_type& node, cst_t::size_type m)const {
75  cst_t::size_type val = leafrank(node);
76  while(m > 0) {
77  val = cst.csa.psi[val];
78  --m;
79  }
80  VLOG(2) << "Next " << m << "-th Leaf " << node << "-> " << cst.select_leaf(m+1) << std::endl;
81  return cst.select_leaf(val+1);
82  }
83  /*
84  * returns the number of preceding leaves of node
85  */
86  cst_t::size_type leafrank(const cst_t::node_type& node)const{
87  VLOG(2) << "Leafrank " << node << "-> " << cst.bp_rank_10.rank(node) << std::endl;
88  return cst.bp_rank_10.rank(node);
89  }
90 
91  /*
92  * Returns the length of the label read from the edges on the path from the root to node
93  */
94  cst_t::size_type str_depth(const cst_t::node_type& node)const{
95  return cst.depth(node);
96  }
97 
98  /*
99  * Returns an unique ID for an internal node of the suffix tree
100  */
101  cst_t::size_type nid(const cst_t::node_type& node)const{
102  assert(!cst.is_leaf(node));
103  VLOG(2) << "NID of node " << node << " is " << m_bp_rank1.rank(node) - cst.bp_rank_10.rank(node) << std::endl;
104  return m_bp_rank1.rank(node)- cst.bp_rank_10.rank(node);
105  }
106 
110  cst_t::size_type number_of_leaves(const cst_t::node_type& node)const{ // only for DEBUG
111  return cst.size(node);
112  }
113 
114 };
115 
116 inline void reset_bitvector(bit_vector& bv) {
117  sdsl::util::set_to_value(bv, 0);
118 }
119 
121  typedef sdsl::bp_support_sada<> bp_support;
122  using cst_t = cst_sada<>;
123  const bp_support& m_bp;
124 
125  sdsl::rrr_vector<> m_rc_rrrv;
126  sdsl::rrr_vector<>::rank_1_type m_rc_rank;
127  typedef bp_support::size_type size_type;
128 
129  //exported from SDSL
130  inline static bool is_root(size_type v)
131  {
132  return v==0;
133  }
134 
135  public:
136  root_childrank_support(const bp_support& bp) : m_bp(bp) {
137  const size_t m_size = bp.size();
138  sdsl::bit_vector root_children(m_size);
139  size_type rc = 1;
140 
141  while(rc < m_size - 1) {
142  root_children[rc] = 1;
143  rc = bp.find_close(rc) + 1;
144  }
145  m_rc_rrrv = sdsl::rrr_vector<>(root_children);
146  m_rc_rank = sdsl::rrr_vector<>::rank_1_type(&m_rc_rrrv);
147  }
148 
154  size_type rank(size_type i) const {
155  DCHECK(!is_root(i));
156  DCHECK(is_root(m_bp.level_anc(i, 1))); //parent must be root!
157 
158  return m_rc_rank(i);
159  }
160 
164  char head(const SuffixTree& st, const cst_t::node_type& node)const {
165  //TODO: Test if this assert works!
166  VLOG(2) << "child: " << rank(st.level_anc(node,1)) << std::endl;
167  VLOG(2) << "Edge: " << (int) st.cst.csa.comp2char[rank(st.level_anc(node,1))] << std::endl;
168  //assert( cst.edge(level_anc(node,1), 1) ==
169  // cst.csa.comp2char[root_child_rank(level_anc(node,1))] );
170  return st.cst.csa.comp2char[rank(st.level_anc(node,1))];
171  }
172 
173  /*
174  * Returns the length of the label read from the edges on the path from the root to node
175  */
176  cst_t::size_type str_depth(const SuffixTree& ST, const cst_t::node_type& node)const{
177  if(ST.cst.is_leaf(node)) {
178  return (ST.cst.size() <= 1) ? 0 : (ST.cst.size()-ST.cst.sn(node));
179  }
180  auto la = ST.cst.leftmost_leaf(ST.cst.select_child(node, 1));
181  auto lb = ST.cst.leftmost_leaf(ST.cst.select_child(node, 2));
182  size_t m = 0;
183  while(head(ST,la) == head(ST,lb)) {
184  la = ST.next_leaf(la);
185  lb = ST.next_leaf(lb);
186  ++m;
187  }
188  DCHECK_EQ(m, ST.cst.depth(node));
189  VLOG(2) << "Depth of " << node << " is " << m << std::endl;
190  return m;
191  }
192  };
193 
194 }}//ns
char head(const SuffixTree &st, const cst_t::node_type &node) const
Returns the first character of the string on the path from the root to the node.
cst_t::size_type nid(const cst_t::node_type &node) const
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
const cst_t::node_type root
sdsl suffix tree
void reset_bitvector(bit_vector &bv)
cst_t::size_type str_depth(const SuffixTree &ST, const cst_t::node_type &node) const
const rank_support_v5< 1 > m_bp_rank1
the root node of the suffix tree
SuffixTree(const cst_t &_cst)
number of internal nodes
cst_t::node_type select_leaf(const cst_t::size_type &i) const
Select the i-th leaf in SA-order 0 <= i < n.
cst_t::size_type leafrank(const cst_t::node_type &node) const
cst_t::node_type level_anc(const cst_t::node_type &node, size_t depth) const
Returns the level ancestor of node.
cst_t::node_type smallest_leaf() const
Return the leaf corresponding to the suffix starting at position 0, i.e., the leaf with the longest s...
cst_t::node_type next_leaf(const cst_t::node_type &node) const
cst_t::size_type str_depth(const cst_t::node_type &node) const
cst_t::node_type next_mth_leaf(const cst_t::node_type &node, cst_t::size_type m) const
const size_t internal_nodes
rank the one bits in the BP
cst_t::size_type number_of_leaves(const cst_t::node_type &node) const
Returns the number of leaves contained in the subtree rooted at &#39;node&#39;.
This is a wrapper class around the sdsl-lite library to get a easier translation between the pseudoco...
size_type rank(size_type i) const
Returns how many preceding silbings node has.
cst_t::node_type parent(const cst_t::node_type &node) const