1 module bitarray.builder; 2 3 import std.typecons : tuple, Tuple; 4 import std.algorithm : reverse, sort, SwapStrategy; 5 import std.algorithm.iteration : map; 6 import std.conv : to; 7 import std.array : array; 8 9 import queue : Queue; 10 import bitarray.bitarray : SuccinctBitVector; 11 12 13 /** 14 The node of the tree. 15 Each node has one character as its member. 16 */ 17 class Node { 18 wchar label; 19 private Node[] children; 20 bool visited; 21 22 this(wchar label) { 23 this.label = label; 24 this.children = []; 25 this.visited = false; 26 } 27 28 void addChild(Node child) { 29 this.children ~= child; 30 } 31 32 ulong getNChildren() { 33 return this.children.length; 34 } 35 } 36 37 38 //TODO add a method to make on-line construction 39 /** 40 This class has:<br> 41 A function which constructs a tree by inserted words.<br> 42 A function which dumps the tree as a LOUDS bit-string.<br> 43 */ 44 class LoudsBitStringBuilder { 45 private Node tree; 46 this(string[] words) { 47 this.tree = new Node(' '); //make root 48 49 //To avoid making effects to the outside of this class, 50 //copy given string array into newly allocated string array 51 string[] words_ = new string[words.length]; 52 words_[0..$] = words[0..$]; 53 54 //sort words in alphabetical order. 55 sort!("a < b", SwapStrategy.stable)(words_); 56 foreach(string word; words_) { 57 wchar[] w = array(map!(to!wchar)(word)); 58 this.build(this.tree, w, 0); 59 } 60 } 61 62 /** 63 Build a tree. 64 */ 65 private void build(Node node, wchar[] word, uint depth) { 66 if(depth == word.length) { 67 return; 68 } 69 70 foreach(Node child; node.children) { 71 if(child.label == word[depth]) { 72 this.build(child, word, depth+1); 73 return; 74 } 75 } 76 77 Node child = new Node(word[depth]); 78 node.addChild(child); 79 this.build(child, word, depth+1); 80 return; 81 } 82 83 /** 84 Dumps a LOUDS bit-string. 85 */ 86 Tuple!(SuccinctBitVector, wchar[]) dump() { 87 //construct a bit vector by Breadth-first search 88 SuccinctBitVector bitvector = new SuccinctBitVector(); 89 wchar[] labels; 90 91 //set the root node 92 bitvector.append(1); 93 bitvector.append(0); 94 labels ~= cast(wchar)(' '); 95 96 Queue!Node queue = new Queue!Node(); 97 queue.append(this.tree); 98 99 while(!queue.isempty) { 100 Node node = queue.pop(); 101 labels ~= node.label; 102 103 // append N ones and 0 104 // N is the number of children of the current node 105 for(auto i = 0; i < node.getNChildren(); i++) { 106 bitvector.append(1); 107 } 108 bitvector.append(0); 109 110 foreach(Node child; node.children) { 111 if(child.visited) { 112 continue; 113 } 114 115 child.visited = true; 116 queue.append(child); 117 } 118 } 119 120 bitvector.build(); 121 return tuple(bitvector, labels); 122 } 123 } 124 125 126 //smoke test 127 unittest { 128 string[] words = ["an", "i", "of", "one", "our", "out"]; 129 130 auto constructor = new LoudsBitStringBuilder(words); 131 auto t = constructor.dump(); 132 SuccinctBitVector bitvector = t[0]; 133 134 //the length of the bitvector should be a multiple of 8 135 assert(bitvector.toString() == "101110100111000101100000"); 136 } 137 138 139 //Ensure the same bit string generated even if the word order is randomized. 140 unittest { 141 string[] words = ["our", "out", "i", "an", "of", "one"]; 142 143 auto constructor = new LoudsBitStringBuilder(words); 144 auto t = constructor.dump(); 145 SuccinctBitVector bitvector = t[0]; 146 147 //the length of the bitvector should be a multiple of 8 148 assert(bitvector.toString() == "101110100111000101100000"); 149 } 150 151 152 unittest { 153 string[] words = ["the", "then", "they"]; 154 155 auto constructor = new LoudsBitStringBuilder(words); 156 auto t = constructor.dump(); 157 SuccinctBitVector bitvector = t[0]; 158 assert(bitvector.toString() == "1010101011000000"); 159 }