6 #include <unordered_map> 24 typedef std::tuple<uint,uint,uint> non_term;
25 typedef std::vector<non_term> non_terminal_symbols;
26 typedef std::vector<std::pair<uint,uint>> rules;
34 std::vector<std::vector<SuffixTree::STNode*> > bins;
36 std::unordered_map<SuffixTree::STNode *, std::vector<uint> > beginning_positions;
57 if(str_depth>= bins.size()){
58 bins.resize(bins.size()*2);
60 std::cerr<<
"resizing: "<<bins.size();
63 if(str_depth>max_depth){
69 bins[str_depth].push_back(node);
79 child_inner->
parent = inner;
82 compute_string_depth( child.second, child_depth);
97 std::vector<uint> starting_positions = beginning_positions[node];
98 std::vector<uint> selected_starting_positions;
100 long min_shorter = 1;
103 selected_starting_positions.reserve(starting_positions.size());
105 long last = 0- (long) length - 1;
107 for (
auto it=starting_positions.begin(); it!=starting_positions.end(); ++it){
109 current = (long) *it;
110 if(last+length <= current && !dead_positions[current] && !dead_positions[current+length-1]){
112 selected_starting_positions.push_back(current);
117 if(current < (
long) dead_positions.
size() && !dead_positions[current] && dead_positions[current+length-1]){
120 while((current+min_shorter < (
long) dead_positions.
size()) && !dead_positions[current+min_shorter]){
128 if(min_shorter < length){
130 if(min_shorter >= (
int) min_lrf){
136 if(depth < (
uint)(min_shorter)){
139 bins[min_shorter].push_back(node);
145 return selected_starting_positions;
153 Meta m(
"lfs_comp",
"st");
171 compute_string_depth(stree.
get_root(),0);
176 DLOG(INFO)<<
"max depth: "<<max_depth;
184 beginning_positions.
reserve(node_count);
190 for(
uint i = bins.size()-1; i>=min_lrf; i--){
191 auto bin_it = bins[i].begin();
192 while (bin_it!= bins[i].end()){
196 auto bp = beginning_positions.find(node);
200 if(bp == beginning_positions.end()){
202 std::vector<uint> positions;
212 auto child_bp = beginning_positions.find(inner_child);
213 if(child_bp != beginning_positions.end()){
215 positions.insert(positions.end(), (*child_bp).second.begin(), (*child_bp).second.end());
217 beginning_positions.erase((*child_bp).first);
225 positions.push_back(leaf_child->suffix);
234 std::sort(positions.begin(), positions.end());
236 uint real_depth = positions.back() - positions.front();
242 beginning_positions[node]=positions;
248 std::vector<uint> & begin_pos = beginning_positions[node];
251 if(begin_pos.size() >= 2 && ( (begin_pos.back() ) - (begin_pos.front()) >= i)){
255 dead_positions[(begin_pos.back())] ||
256 dead_positions[(begin_pos.front())]
263 std::vector<uint> sel_pos = select_starting_positions(node, i);
269 if(! (sel_pos.size() >=2) ){
274 std::pair<uint,uint> rule = std::make_pair(sel_pos.at(0), i);
275 dictionary.push_back(rule);
278 for(
auto bp_it = sel_pos.begin(); bp_it != sel_pos.end(); bp_it++){
279 non_term nts = std::make_tuple(*bp_it, nts_number, i);
280 nts_symbols.push_back(nts);
283 dead_positions[
pos+ *bp_it] = 1;
305 DLOG(INFO) <<
"sorting occurences";
306 std::sort(nts_symbols.begin(), nts_symbols.end());
Contains the text compression and encoding framework.
A vector over arbitrary unsigned integer types.
void compute_rules(io::InputView &input, rules &dictionary, non_terminal_symbols &nts_symbols)
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
IntVector< uint_t< 1 > > BitVector
Represents a bit vector, alias for IntVector with a fixed bit width of 1.
uint64_t as_integer() const
Algorithm(Algorithm const &)=default
Env & env()
Provides access to the environment that the algorithm works in.
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
SuffixTree::STNode * get_root()
void reserve(size_type n)
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
uint edge_length(STNode *node)
std::unordered_map< char, STNode * > child_nodes
Interface for algorithms.