1 /*
2 * #%L
3 * StringDecoderCache.java - mongodb-async-driver - Allanbank Consulting, Inc.
4 * %%
5 * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
6 * %%
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * #L%
19 */
20
21 package com.allanbank.mongodb.bson.io;
22
23 import java.io.EOFException;
24 import java.io.StreamCorruptedException;
25 import java.util.List;
26 import java.util.SortedMap;
27
28 /**
29 * StringDecoderCache provides a cache for decoding strings.
30 * <p>
31 * This class is thread safe. Thread safety is achieved by maintaining two data
32 * structures. The first is a map of seen strings to the number of times the
33 * string has been seen. The map is maintained by the base class:
34 * {@link AbstractStringCache}. The second structure is a trie of the cached
35 * {@code byte[]} to the decoded string.
36 * </p>
37 *
38 * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
39 * mutated in incompatible ways between any two releases of the driver.
40 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
41 */
42 public class StringDecoderCache extends AbstractStringCache {
43
44 /** The cached decoded strings in the form of a trie cache. */
45 private TrieCache myTrieCache;
46
47 /**
48 * Creates a new TrieStringCache.
49 */
50 public StringDecoderCache() {
51 super();
52
53 myTrieCache = new TrieCache();
54 }
55
56 /**
57 * Decode a string of a known length. The last byte should be a zero byte
58 * and will not be included in the decoded string.
59 *
60 * @param source
61 * The source of the bytes in the string.
62 * @param offset
63 * The offset of the first byte to decode.
64 * @param length
65 * The length of the string to decode with a terminal zero byte.
66 * @return The decoded string.
67 * @throws StreamCorruptedException
68 * On the decoding of the string failing.
69 * @throws EOFException
70 * On the array not containing enough bytes to decoded.
71 */
72 public String find(final byte[] source, final int offset, final int length)
73 throws StreamCorruptedException, EOFException {
74
75 // Check for the easiest value to cache.
76 if (length <= 1) {
77 return "";
78 }
79 // And for values blowing out the cache.
80 else if (myMaxCacheLength < length) {
81 return null;
82 }
83 // Check for the terminal null.
84 else if (source[(offset + length) - 1] != 0) {
85 throw new StreamCorruptedException(
86 "Expected a null character at the end of the string.");
87 }
88
89 return myTrieCache.find(source, offset, length);
90 }
91
92 /**
93 * Clears the cache.
94 */
95 @Override
96 protected void clear() {
97 myTrieCache = new TrieCache();
98 super.clear();
99 }
100
101 /**
102 * Rebuilds the cache from the current collection of seen entries.
103 */
104 @Override
105 protected void rebuildCache() {
106 final SortedMap<Integer, List<SeenString>> order = buildCacheGroups();
107
108 // Rebuild the cache.
109 final TrieCache cache = new TrieCache();
110 int count = 0;
111 for (final List<SeenString> seenAtCount : order.values()) {
112 for (final SeenString seen : seenAtCount) {
113 if (count < myMaxCachEntries) {
114 cache.addEntry(seen.getBytes(), seen.getValue());
115 count += 1;
116 }
117 else {
118 mySeen.remove(seen.getValue());
119 }
120 }
121 }
122
123 myTrieCache = cache;
124 }
125
126 /**
127 * A fast cache for bytes to decoded strings.
128 * <p>
129 * The trie is designed to be used in a RCU fashion where the trie is
130 * constructed as a series of {@link #addEntry(byte[], String)} calls and
131 * once the trie is fully populated then is can be queried using
132 * {@link #find(byte[], int, int)}.
133 * </p>
134 *
135 * @api.no This class is <b>NOT</b> part of the drivers API. This class may
136 * be mutated in incompatible ways between any two releases of the
137 * driver.
138 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
139 */
140 protected static class TrieCache {
141 /** The root nodes for the trie. */
142 private final TrieNode[] myRoots;
143
144 /**
145 * Creates a new TrieCache.
146 */
147 public TrieCache() {
148 myRoots = new TrieNode[256];
149 }
150
151 /**
152 * Adds an entry to the trie cache.
153 *
154 * @param source
155 * The source of the bytes in the string.
156 * @param value
157 * The value to associate with the entry.
158 */
159 public void addEntry(final byte[] source, final String value) {
160
161 // Minus 1 for the terminal null.
162 final int last = source.length - 1;
163
164 final int firstByte = source[0] & 0xFF;
165 TrieNode node = myRoots[firstByte];
166 if (node == null) {
167 node = myRoots[firstByte] = new TrieNode(source[0]);
168 }
169
170 // Walk the Trie to the end node.
171 for (int i = 1; i < last; ++i) {
172 TrieNode child = node.child(source[i]);
173
174 if (child == null) {
175 child = new TrieNode(source[i]);
176 node.addChild(child);
177 }
178
179 node = child;
180 }
181
182 node.setDecoded(value);
183
184 }
185
186 /**
187 * Finds the value for the byte string, if known to the cache. Returns
188 * null on a cache miss.
189 *
190 * @param source
191 * The source of the bytes in the string.
192 * @param offset
193 * The offset of the first byte to decode.
194 * @param length
195 * The length of the string to decode with a terminal zero
196 * byte.
197 * @return The cached decoded string or null in the case of a cache
198 * miss.
199 */
200 public String find(final byte[] source, final int offset,
201 final int length) {
202
203 // Minus 1 for the terminal null.
204 final int last = offset + (length - 1);
205
206 final int firstByte = source[offset] & 0xFF;
207 TrieNode node = myRoots[firstByte];
208
209 // Walk the Trie to the end node.
210 for (int i = offset + 1; (node != null) && (i < last); ++i) {
211 node = node.child(source[i]);
212 }
213
214 return (node != null) ? node.getDecoded() : null;
215 }
216
217 /**
218 * Node provides a single node in the trie.
219 * <p>
220 * {@link #myChildren} is a two dimension array for the children of this
221 * node. We use a 2 dimension array to reduce the memory footprint of
222 * the trie by taking advantage of the clustering within the encoding of
223 * languages. The first dimension of the array we refer to as the
224 * {@code zone} and represents the first/high nibble of the value for
225 * the child node. The second dimension of the array is the second/low
226 * nibble.
227 * </p>
228 *
229 * @api.no This class is <b>NOT</b> part of the drivers API. This class
230 * may be mutated in incompatible ways between any two releases
231 * of the driver.
232 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
233 */
234 protected static class TrieNode {
235 /**
236 * The children of the node. See the class javadoc for a description
237 * of the structure.
238 */
239 private TrieNode[][] myChildren;
240
241 /** The decoded value for the node. May be <code>null</code>. */
242 private String myDecoded;
243
244 /** The value for the node. */
245 private final byte myValue;
246
247 /**
248 * Creates a new Node.
249 *
250 * @param value
251 * The value for the node.
252 */
253 public TrieNode(final byte value) {
254 myValue = value;
255 myDecoded = null;
256
257 // No children, yet.
258 myChildren = null;
259 }
260
261 /**
262 * Adds a child node to this node.
263 *
264 * @param child
265 * The child node to add.
266 */
267 public void addChild(final TrieNode child) {
268 final int value = child.getValue();
269 final int zone = (value & 0xF0) >> 4;
270 final int index = (value & 0x0F);
271
272 if (myChildren == null) {
273 myChildren = new TrieNode[16][];
274 myChildren[zone] = new TrieNode[16];
275 }
276 else if (myChildren[zone] == null) {
277 myChildren[zone] = new TrieNode[16];
278 }
279
280 myChildren[zone][index] = child;
281 }
282
283 /**
284 * Returns the child node with the specified value.
285 *
286 * @param value
287 * The value for the child to find.
288 * @return The child node for the value or null if there is node
289 * child with that value.
290 */
291 public TrieNode child(final byte value) {
292 final int zone = (value & 0xF0) >> 4;
293 final int index = (value & 0x0F);
294
295 if ((myChildren != null) && (myChildren[zone] != null)) {
296 return myChildren[zone][index];
297 }
298 return null;
299 }
300
301 /**
302 * Returns the node's decoded value.
303 *
304 * @return The node's decoded value.
305 */
306 public String getDecoded() {
307 return myDecoded;
308 }
309
310 /**
311 * Returns the node's value.
312 *
313 * @return The node's value.
314 */
315 public byte getValue() {
316 return myValue;
317 }
318
319 /**
320 * Sets the decoded value for the node.
321 *
322 * @param decoded
323 * The decoded value for the node.
324 */
325 public void setDecoded(final String decoded) {
326 myDecoded = decoded;
327 }
328 }
329 }
330
331 }