Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
StringDecoderCache |
|
| 2.5384615384615383;2.538 | ||||
StringDecoderCache$TrieCache |
|
| 2.5384615384615383;2.538 | ||||
StringDecoderCache$TrieCache$TrieNode |
|
| 2.5384615384615383;2.538 |
1 | /* | |
2 | * #%L | |
3 | * StringDecoderCache.java - mongodb-async-driver - Allanbank Consulting, Inc. | |
4 | * %% | |
5 | * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc. | |
6 | * %% | |
7 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
8 | * you may not use this file except in compliance with the License. | |
9 | * You may obtain a copy of the License at | |
10 | * | |
11 | * http://www.apache.org/licenses/LICENSE-2.0 | |
12 | * | |
13 | * Unless required by applicable law or agreed to in writing, software | |
14 | * distributed under the License is distributed on an "AS IS" BASIS, | |
15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
16 | * See the License for the specific language governing permissions and | |
17 | * limitations under the License. | |
18 | * #L% | |
19 | */ | |
20 | ||
21 | package com.allanbank.mongodb.bson.io; | |
22 | ||
23 | import java.io.EOFException; | |
24 | import java.io.StreamCorruptedException; | |
25 | import java.util.List; | |
26 | import java.util.SortedMap; | |
27 | ||
28 | /** | |
29 | * StringDecoderCache provides a cache for decoding strings. | |
30 | * <p> | |
31 | * This class is thread safe. Thread safety is achieved by maintaining two data | |
32 | * structures. The first is a map of seen strings to the number of times the | |
33 | * string has been seen. The map is maintained by the base class: | |
34 | * {@link AbstractStringCache}. The second structure is a trie of the cached | |
35 | * {@code byte[]} to the decoded string. | |
36 | * </p> | |
37 | * | |
38 | * @api.no This class is <b>NOT</b> part of the drivers API. This class may be | |
39 | * mutated in incompatible ways between any two releases of the driver. | |
40 | * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved | |
41 | */ | |
42 | public class StringDecoderCache extends AbstractStringCache { | |
43 | ||
44 | /** The cached decoded strings in the form of a trie cache. */ | |
45 | private TrieCache myTrieCache; | |
46 | ||
47 | /** | |
48 | * Creates a new TrieStringCache. | |
49 | */ | |
50 | public StringDecoderCache() { | |
51 | 3476 | super(); |
52 | ||
53 | 3476 | myTrieCache = new TrieCache(); |
54 | 3476 | } |
55 | ||
56 | /** | |
57 | * Decode a string of a known length. The last byte should be a zero byte | |
58 | * and will not be included in the decoded string. | |
59 | * | |
60 | * @param source | |
61 | * The source of the bytes in the string. | |
62 | * @param offset | |
63 | * The offset of the first byte to decode. | |
64 | * @param length | |
65 | * The length of the string to decode with a terminal zero byte. | |
66 | * @return The decoded string. | |
67 | * @throws StreamCorruptedException | |
68 | * On the decoding of the string failing. | |
69 | * @throws EOFException | |
70 | * On the array not containing enough bytes to decoded. | |
71 | */ | |
72 | public String find(final byte[] source, final int offset, final int length) | |
73 | throws StreamCorruptedException, EOFException { | |
74 | ||
75 | // Check for the easiest value to cache. | |
76 | 71642 | if (length <= 1) { |
77 | 18 | return ""; |
78 | } | |
79 | // And for values blowing out the cache. | |
80 | 71623 | else if (myMaxCacheLength < length) { |
81 | 9 | return null; |
82 | } | |
83 | // Check for the terminal null. | |
84 | 71615 | else if (source[(offset + length) - 1] != 0) { |
85 | 2 | throw new StreamCorruptedException( |
86 | "Expected a null character at the end of the string."); | |
87 | } | |
88 | ||
89 | 71613 | return myTrieCache.find(source, offset, length); |
90 | } | |
91 | ||
92 | /** | |
93 | * Clears the cache. | |
94 | */ | |
95 | @Override | |
96 | protected void clear() { | |
97 | 35 | myTrieCache = new TrieCache(); |
98 | 35 | super.clear(); |
99 | 35 | } |
100 | ||
101 | /** | |
102 | * Rebuilds the cache from the current collection of seen entries. | |
103 | */ | |
104 | @Override | |
105 | protected void rebuildCache() { | |
106 | 2416 | final SortedMap<Integer, List<SeenString>> order = buildCacheGroups(); |
107 | ||
108 | // Rebuild the cache. | |
109 | 2416 | final TrieCache cache = new TrieCache(); |
110 | 2416 | int count = 0; |
111 | 2416 | for (final List<SeenString> seenAtCount : order.values()) { |
112 | 2565 | for (final SeenString seen : seenAtCount) { |
113 | 5717 | if (count < myMaxCachEntries) { |
114 | 5614 | cache.addEntry(seen.getBytes(), seen.getValue()); |
115 | 5614 | count += 1; |
116 | } | |
117 | else { | |
118 | 103 | mySeen.remove(seen.getValue()); |
119 | } | |
120 | 5717 | } |
121 | 2565 | } |
122 | ||
123 | 2416 | myTrieCache = cache; |
124 | 2416 | } |
125 | ||
126 | /** | |
127 | * A fast cache for bytes to decoded strings. | |
128 | * <p> | |
129 | * The trie is designed to be used in a RCU fashion where the trie is | |
130 | * constructed as a series of {@link #addEntry(byte[], String)} calls and | |
131 | * once the trie is fully populated then is can be queried using | |
132 | * {@link #find(byte[], int, int)}. | |
133 | * </p> | |
134 | * | |
135 | * @api.no This class is <b>NOT</b> part of the drivers API. This class may | |
136 | * be mutated in incompatible ways between any two releases of the | |
137 | * driver. | |
138 | * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved | |
139 | */ | |
140 | protected static class TrieCache { | |
141 | /** The root nodes for the trie. */ | |
142 | private final TrieNode[] myRoots; | |
143 | ||
144 | /** | |
145 | * Creates a new TrieCache. | |
146 | */ | |
147 | 5927 | public TrieCache() { |
148 | 5927 | myRoots = new TrieNode[256]; |
149 | 5927 | } |
150 | ||
151 | /** | |
152 | * Adds an entry to the trie cache. | |
153 | * | |
154 | * @param source | |
155 | * The source of the bytes in the string. | |
156 | * @param value | |
157 | * The value to associate with the entry. | |
158 | */ | |
159 | public void addEntry(final byte[] source, final String value) { | |
160 | ||
161 | // Minus 1 for the terminal null. | |
162 | 5614 | final int last = source.length - 1; |
163 | ||
164 | 5614 | final int firstByte = source[0] & 0xFF; |
165 | 5614 | TrieNode node = myRoots[firstByte]; |
166 | 5614 | if (node == null) { |
167 | 3064 | node = myRoots[firstByte] = new TrieNode(source[0]); |
168 | } | |
169 | ||
170 | // Walk the Trie to the end node. | |
171 | 23032 | for (int i = 1; i < last; ++i) { |
172 | 17418 | TrieNode child = node.child(source[i]); |
173 | ||
174 | 17418 | if (child == null) { |
175 | 16685 | child = new TrieNode(source[i]); |
176 | 16685 | node.addChild(child); |
177 | } | |
178 | ||
179 | 17418 | node = child; |
180 | } | |
181 | ||
182 | 5614 | node.setDecoded(value); |
183 | ||
184 | 5614 | } |
185 | ||
186 | /** | |
187 | * Finds the value for the byte string, if known to the cache. Returns | |
188 | * null on a cache miss. | |
189 | * | |
190 | * @param source | |
191 | * The source of the bytes in the string. | |
192 | * @param offset | |
193 | * The offset of the first byte to decode. | |
194 | * @param length | |
195 | * The length of the string to decode with a terminal zero | |
196 | * byte. | |
197 | * @return The cached decoded string or null in the case of a cache | |
198 | * miss. | |
199 | */ | |
200 | public String find(final byte[] source, final int offset, | |
201 | final int length) { | |
202 | ||
203 | // Minus 1 for the terminal null. | |
204 | 71612 | final int last = offset + (length - 1); |
205 | ||
206 | 71613 | final int firstByte = source[offset] & 0xFF; |
207 | 71612 | TrieNode node = myRoots[firstByte]; |
208 | ||
209 | // Walk the Trie to the end node. | |
210 | 156278 | for (int i = offset + 1; (node != null) && (i < last); ++i) { |
211 | 84668 | node = node.child(source[i]); |
212 | } | |
213 | ||
214 | 71611 | return (node != null) ? node.getDecoded() : null; |
215 | } | |
216 | ||
217 | /** | |
218 | * Node provides a single node in the trie. | |
219 | * <p> | |
220 | * {@link #myChildren} is a two dimension array for the children of this | |
221 | * node. We use a 2 dimension array to reduce the memory footprint of | |
222 | * the trie by taking advantage of the clustering within the encoding of | |
223 | * languages. The first dimension of the array we refer to as the | |
224 | * {@code zone} and represents the first/high nibble of the value for | |
225 | * the child node. The second dimension of the array is the second/low | |
226 | * nibble. | |
227 | * </p> | |
228 | * | |
229 | * @api.no This class is <b>NOT</b> part of the drivers API. This class | |
230 | * may be mutated in incompatible ways between any two releases | |
231 | * of the driver. | |
232 | * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved | |
233 | */ | |
234 | protected static class TrieNode { | |
235 | /** | |
236 | * The children of the node. See the class javadoc for a description | |
237 | * of the structure. | |
238 | */ | |
239 | private TrieNode[][] myChildren; | |
240 | ||
241 | /** The decoded value for the node. May be <code>null</code>. */ | |
242 | private String myDecoded; | |
243 | ||
244 | /** The value for the node. */ | |
245 | private final byte myValue; | |
246 | ||
247 | /** | |
248 | * Creates a new Node. | |
249 | * | |
250 | * @param value | |
251 | * The value for the node. | |
252 | */ | |
253 | 19749 | public TrieNode(final byte value) { |
254 | 19749 | myValue = value; |
255 | 19749 | myDecoded = null; |
256 | ||
257 | // No children, yet. | |
258 | 19749 | myChildren = null; |
259 | 19749 | } |
260 | ||
261 | /** | |
262 | * Adds a child node to this node. | |
263 | * | |
264 | * @param child | |
265 | * The child node to add. | |
266 | */ | |
267 | public void addChild(final TrieNode child) { | |
268 | 16685 | final int value = child.getValue(); |
269 | 16685 | final int zone = (value & 0xF0) >> 4; |
270 | 16685 | final int index = (value & 0x0F); |
271 | ||
272 | 16685 | if (myChildren == null) { |
273 | 14216 | myChildren = new TrieNode[16][]; |
274 | 14216 | myChildren[zone] = new TrieNode[16]; |
275 | } | |
276 | 2469 | else if (myChildren[zone] == null) { |
277 | 64 | myChildren[zone] = new TrieNode[16]; |
278 | } | |
279 | ||
280 | 16685 | myChildren[zone][index] = child; |
281 | 16685 | } |
282 | ||
283 | /** | |
284 | * Returns the child node with the specified value. | |
285 | * | |
286 | * @param value | |
287 | * The value for the child to find. | |
288 | * @return The child node for the value or null if there is node | |
289 | * child with that value. | |
290 | */ | |
291 | public TrieNode child(final byte value) { | |
292 | 102086 | final int zone = (value & 0xF0) >> 4; |
293 | 102086 | final int index = (value & 0x0F); |
294 | ||
295 | 102086 | if ((myChildren != null) && (myChildren[zone] != null)) { |
296 | 87335 | return myChildren[zone][index]; |
297 | } | |
298 | 14751 | return null; |
299 | } | |
300 | ||
301 | /** | |
302 | * Returns the node's decoded value. | |
303 | * | |
304 | * @return The node's decoded value. | |
305 | */ | |
306 | public String getDecoded() { | |
307 | 1036 | return myDecoded; |
308 | } | |
309 | ||
310 | /** | |
311 | * Returns the node's value. | |
312 | * | |
313 | * @return The node's value. | |
314 | */ | |
315 | public byte getValue() { | |
316 | 16685 | return myValue; |
317 | } | |
318 | ||
319 | /** | |
320 | * Sets the decoded value for the node. | |
321 | * | |
322 | * @param decoded | |
323 | * The decoded value for the node. | |
324 | */ | |
325 | public void setDecoded(final String decoded) { | |
326 | 5614 | myDecoded = decoded; |
327 | 5614 | } |
328 | } | |
329 | } | |
330 | ||
331 | } |