1 /* 2 * #%L 3 * StringDecoderCache.java - mongodb-async-driver - Allanbank Consulting, Inc. 4 * %% 5 * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc. 6 * %% 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * #L% 19 */ 20 21 package com.allanbank.mongodb.bson.io; 22 23 import java.io.EOFException; 24 import java.io.StreamCorruptedException; 25 import java.util.List; 26 import java.util.SortedMap; 27 28 /** 29 * StringDecoderCache provides a cache for decoding strings. 30 * <p> 31 * This class is thread safe. Thread safety is achieved by maintaining two data 32 * structures. The first is a map of seen strings to the number of times the 33 * string has been seen. The map is maintained by the base class: 34 * {@link AbstractStringCache}. The second structure is a trie of the cached 35 * {@code byte[]} to the decoded string. 36 * </p> 37 * 38 * @api.no This class is <b>NOT</b> part of the drivers API. This class may be 39 * mutated in incompatible ways between any two releases of the driver. 40 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved 41 */ 42 public class StringDecoderCache extends AbstractStringCache { 43 44 /** The cached decoded strings in the form of a trie cache. */ 45 private TrieCache myTrieCache; 46 47 /** 48 * Creates a new TrieStringCache. 49 */ 50 public StringDecoderCache() { 51 super(); 52 53 myTrieCache = new TrieCache(); 54 } 55 56 /** 57 * Decode a string of a known length. The last byte should be a zero byte 58 * and will not be included in the decoded string. 59 * 60 * @param source 61 * The source of the bytes in the string. 62 * @param offset 63 * The offset of the first byte to decode. 64 * @param length 65 * The length of the string to decode with a terminal zero byte. 66 * @return The decoded string. 67 * @throws StreamCorruptedException 68 * On the decoding of the string failing. 69 * @throws EOFException 70 * On the array not containing enough bytes to decoded. 71 */ 72 public String find(final byte[] source, final int offset, final int length) 73 throws StreamCorruptedException, EOFException { 74 75 // Check for the easiest value to cache. 76 if (length <= 1) { 77 return ""; 78 } 79 // And for values blowing out the cache. 80 else if (myMaxCacheLength < length) { 81 return null; 82 } 83 // Check for the terminal null. 84 else if (source[(offset + length) - 1] != 0) { 85 throw new StreamCorruptedException( 86 "Expected a null character at the end of the string."); 87 } 88 89 return myTrieCache.find(source, offset, length); 90 } 91 92 /** 93 * Clears the cache. 94 */ 95 @Override 96 protected void clear() { 97 myTrieCache = new TrieCache(); 98 super.clear(); 99 } 100 101 /** 102 * Rebuilds the cache from the current collection of seen entries. 103 */ 104 @Override 105 protected void rebuildCache() { 106 final SortedMap<Integer, List<SeenString>> order = buildCacheGroups(); 107 108 // Rebuild the cache. 109 final TrieCache cache = new TrieCache(); 110 int count = 0; 111 for (final List<SeenString> seenAtCount : order.values()) { 112 for (final SeenString seen : seenAtCount) { 113 if (count < myMaxCachEntries) { 114 cache.addEntry(seen.getBytes(), seen.getValue()); 115 count += 1; 116 } 117 else { 118 mySeen.remove(seen.getValue()); 119 } 120 } 121 } 122 123 myTrieCache = cache; 124 } 125 126 /** 127 * A fast cache for bytes to decoded strings. 128 * <p> 129 * The trie is designed to be used in a RCU fashion where the trie is 130 * constructed as a series of {@link #addEntry(byte[], String)} calls and 131 * once the trie is fully populated then is can be queried using 132 * {@link #find(byte[], int, int)}. 133 * </p> 134 * 135 * @api.no This class is <b>NOT</b> part of the drivers API. This class may 136 * be mutated in incompatible ways between any two releases of the 137 * driver. 138 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved 139 */ 140 protected static class TrieCache { 141 /** The root nodes for the trie. */ 142 private final TrieNode[] myRoots; 143 144 /** 145 * Creates a new TrieCache. 146 */ 147 public TrieCache() { 148 myRoots = new TrieNode[256]; 149 } 150 151 /** 152 * Adds an entry to the trie cache. 153 * 154 * @param source 155 * The source of the bytes in the string. 156 * @param value 157 * The value to associate with the entry. 158 */ 159 public void addEntry(final byte[] source, final String value) { 160 161 // Minus 1 for the terminal null. 162 final int last = source.length - 1; 163 164 final int firstByte = source[0] & 0xFF; 165 TrieNode node = myRoots[firstByte]; 166 if (node == null) { 167 node = myRoots[firstByte] = new TrieNode(source[0]); 168 } 169 170 // Walk the Trie to the end node. 171 for (int i = 1; i < last; ++i) { 172 TrieNode child = node.child(source[i]); 173 174 if (child == null) { 175 child = new TrieNode(source[i]); 176 node.addChild(child); 177 } 178 179 node = child; 180 } 181 182 node.setDecoded(value); 183 184 } 185 186 /** 187 * Finds the value for the byte string, if known to the cache. Returns 188 * null on a cache miss. 189 * 190 * @param source 191 * The source of the bytes in the string. 192 * @param offset 193 * The offset of the first byte to decode. 194 * @param length 195 * The length of the string to decode with a terminal zero 196 * byte. 197 * @return The cached decoded string or null in the case of a cache 198 * miss. 199 */ 200 public String find(final byte[] source, final int offset, 201 final int length) { 202 203 // Minus 1 for the terminal null. 204 final int last = offset + (length - 1); 205 206 final int firstByte = source[offset] & 0xFF; 207 TrieNode node = myRoots[firstByte]; 208 209 // Walk the Trie to the end node. 210 for (int i = offset + 1; (node != null) && (i < last); ++i) { 211 node = node.child(source[i]); 212 } 213 214 return (node != null) ? node.getDecoded() : null; 215 } 216 217 /** 218 * Node provides a single node in the trie. 219 * <p> 220 * {@link #myChildren} is a two dimension array for the children of this 221 * node. We use a 2 dimension array to reduce the memory footprint of 222 * the trie by taking advantage of the clustering within the encoding of 223 * languages. The first dimension of the array we refer to as the 224 * {@code zone} and represents the first/high nibble of the value for 225 * the child node. The second dimension of the array is the second/low 226 * nibble. 227 * </p> 228 * 229 * @api.no This class is <b>NOT</b> part of the drivers API. This class 230 * may be mutated in incompatible ways between any two releases 231 * of the driver. 232 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved 233 */ 234 protected static class TrieNode { 235 /** 236 * The children of the node. See the class javadoc for a description 237 * of the structure. 238 */ 239 private TrieNode[][] myChildren; 240 241 /** The decoded value for the node. May be <code>null</code>. */ 242 private String myDecoded; 243 244 /** The value for the node. */ 245 private final byte myValue; 246 247 /** 248 * Creates a new Node. 249 * 250 * @param value 251 * The value for the node. 252 */ 253 public TrieNode(final byte value) { 254 myValue = value; 255 myDecoded = null; 256 257 // No children, yet. 258 myChildren = null; 259 } 260 261 /** 262 * Adds a child node to this node. 263 * 264 * @param child 265 * The child node to add. 266 */ 267 public void addChild(final TrieNode child) { 268 final int value = child.getValue(); 269 final int zone = (value & 0xF0) >> 4; 270 final int index = (value & 0x0F); 271 272 if (myChildren == null) { 273 myChildren = new TrieNode[16][]; 274 myChildren[zone] = new TrieNode[16]; 275 } 276 else if (myChildren[zone] == null) { 277 myChildren[zone] = new TrieNode[16]; 278 } 279 280 myChildren[zone][index] = child; 281 } 282 283 /** 284 * Returns the child node with the specified value. 285 * 286 * @param value 287 * The value for the child to find. 288 * @return The child node for the value or null if there is node 289 * child with that value. 290 */ 291 public TrieNode child(final byte value) { 292 final int zone = (value & 0xF0) >> 4; 293 final int index = (value & 0x0F); 294 295 if ((myChildren != null) && (myChildren[zone] != null)) { 296 return myChildren[zone][index]; 297 } 298 return null; 299 } 300 301 /** 302 * Returns the node's decoded value. 303 * 304 * @return The node's decoded value. 305 */ 306 public String getDecoded() { 307 return myDecoded; 308 } 309 310 /** 311 * Returns the node's value. 312 * 313 * @return The node's value. 314 */ 315 public byte getValue() { 316 return myValue; 317 } 318 319 /** 320 * Sets the decoded value for the node. 321 * 322 * @param decoded 323 * The decoded value for the node. 324 */ 325 public void setDecoded(final String decoded) { 326 myDecoded = decoded; 327 } 328 } 329 } 330 331 }