1 module bitarray.builder;
2 
3 import std.typecons : tuple, Tuple;
4 import std.algorithm : reverse, sort, SwapStrategy;
5 import std.algorithm.iteration : map;
6 import std.conv : to;
7 import std.array : array;
8 
9 import queue : Queue;
10 import bitarray.bitarray : SuccinctBitVector;
11 
12 
13 /**
14   The node of the tree.
15   Each node has one character as its member.
16  */
17 class Node {
18     wchar label;
19     private Node[] children;
20     bool visited;
21 
22     this(wchar label) {
23         this.label = label;
24         this.children = [];
25         this.visited = false;
26     }
27 
28     void addChild(Node child) {
29         this.children ~= child;
30     }
31 
32     ulong getNChildren() {
33         return this.children.length;
34     }
35 }
36 
37 
38 //TODO add a method to make on-line construction
39 /**
40   This class has:<br>
41   A function which constructs a tree by inserted words.<br>
42   A function which dumps the tree as a LOUDS bit-string.<br>
43  */
44 class LoudsBitStringBuilder {
45     private Node tree;
46     this(string[] words) {
47         this.tree = new Node(' ');  //make root
48 
49         //To avoid making effects to the outside of this class,
50         //copy given string array into newly allocated string array
51         string[] words_ = new string[words.length];
52         words_[0..$] = words[0..$];
53 
54         //sort words in alphabetical order.
55         sort!("a < b", SwapStrategy.stable)(words_);
56         foreach(string word; words_) {
57             wchar[] w = array(map!(to!wchar)(word));
58             this.build(this.tree, w, 0);
59         }
60     }
61 
62     /**
63       Build a tree.
64      */
65     private void build(Node node, wchar[] word, uint depth) {
66         if(depth == word.length) {
67             return;
68         }
69 
70         foreach(Node child; node.children) {
71             if(child.label == word[depth]) {
72                 this.build(child, word, depth+1);
73                 return;
74             }
75         }
76 
77         Node child = new Node(word[depth]);
78         node.addChild(child);
79         this.build(child, word, depth+1);
80         return;
81     }
82 
83     /**
84       Dumps a LOUDS bit-string.
85      */
86     Tuple!(SuccinctBitVector, wchar[]) dump() {
87         //construct a bit vector by Breadth-first search
88         SuccinctBitVector bitvector = new SuccinctBitVector();
89         wchar[] labels;
90 
91         //set the root node
92         bitvector.append(1);
93         bitvector.append(0);
94         labels ~= cast(wchar)(' ');
95 
96         Queue!Node queue = new Queue!Node();
97         queue.append(this.tree);
98 
99         while(!queue.isempty) {
100             Node node = queue.pop();
101             labels ~= node.label;
102 
103             // append N ones and 0
104             // N is the number of children of the current node
105             for(auto i = 0; i < node.getNChildren(); i++) {
106                 bitvector.append(1);
107             }
108             bitvector.append(0);
109 
110             foreach(Node child; node.children) {
111                 if(child.visited) {
112                     continue;
113                 }
114 
115                 child.visited = true;
116                 queue.append(child);
117             }
118         }
119 
120         bitvector.build();
121         return tuple(bitvector, labels);
122     }
123 }
124 
125 
126 //smoke test
127 unittest {
128     string[] words = ["an", "i", "of", "one", "our", "out"];
129 
130     auto constructor = new LoudsBitStringBuilder(words);
131     auto t = constructor.dump();
132     SuccinctBitVector bitvector = t[0];
133 
134     //the length of the bitvector should be a multiple of 8
135     assert(bitvector.toString() == "101110100111000101100000");
136 }
137 
138 
139 //Ensure the same bit string generated even if the word order is randomized.
140 unittest {
141     string[] words = ["our", "out", "i", "an", "of", "one"];
142 
143     auto constructor = new LoudsBitStringBuilder(words);
144     auto t = constructor.dump();
145     SuccinctBitVector bitvector = t[0];
146 
147     //the length of the bitvector should be a multiple of 8
148     assert(bitvector.toString() == "101110100111000101100000");
149 }
150 
151 
152 unittest {
153     string[] words = ["the", "then", "they"];
154 
155     auto constructor = new LoudsBitStringBuilder(words);
156     auto t = constructor.dump();
157     SuccinctBitVector bitvector = t[0];
158     assert(bitvector.toString() == "1010101011000000");
159 }