1 /*
2 * #%L
3 * StringDecoder.java - mongodb-async-driver - Allanbank Consulting, Inc.
4 * %%
5 * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
6 * %%
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * #L%
19 */
20
21 package com.allanbank.mongodb.bson.io;
22
23 import java.io.EOFException;
24 import java.io.StreamCorruptedException;
25 import java.nio.charset.Charset;
26
27 /**
28 * StringDecoder provides a decoder for byte arrays into strings that uses a
29 * trie data structure to cache recurring strings.
30 * <p>
31 * This class is <b>not</b> thread safe.
32 * </p>
33 *
34 * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
35 * mutated in incompatible ways between any two releases of the driver.
36 * @copyright 2013, Allanbank Consulting, Inc., All Rights Reserved
37 */
38 public class StringDecoder {
39
40 /** UTF-8 Character set for encoding strings. */
41 /* package */final static Charset UTF8 = Charset.forName("UTF-8");
42
43 /** A builder for the ASCII strings. */
44 private final StringBuilder myBuilder = new StringBuilder(64);
45
46 /** The cached decoded strings. */
47 private final StringDecoderCache myCache;
48
49 /**
50 * Creates a new StringDecoder.
51 */
52 public StringDecoder() {
53 this(new StringDecoderCache());
54 }
55
56 /**
57 * Creates a new StringDecoder.
58 *
59 * @param cache
60 * The cache for the decoder.
61 */
62 public StringDecoder(final StringDecoderCache cache) {
63 super();
64
65 myCache = cache;
66 }
67
68 /**
69 * Decode a string of a known length. The last byte should be a zero byte
70 * and will not be included in the decoded string.
71 *
72 * @param source
73 * The source of the bytes in the string.
74 * @param offset
75 * The offset of the first byte to decode.
76 * @param length
77 * The length of the string to decode with a terminal zero byte.
78 * @return The decoded string.
79 * @throws StreamCorruptedException
80 * On the decoding of the string failing.
81 * @throws EOFException
82 * On the array not containing enough bytes to decoded.
83 */
84 public String decode(final byte[] source, final int offset, final int length)
85 throws StreamCorruptedException, EOFException {
86
87 String result = myCache.find(source, offset, length);
88 if (result == null) {
89 result = fastDecode(source, offset, length - 1);
90 }
91
92 myCache.used(result, source, offset, length);
93
94 return result;
95 }
96
97 /**
98 * Returns the cache value.
99 *
100 * @return The cache value.
101 * @deprecated The cache {@link StringDecoderCache} should be controlled
102 * directly. This method will be removed after the 2.1.0
103 * release.
104 */
105 @Deprecated
106 public StringDecoderCache getCache() {
107 return myCache;
108 }
109
110 /**
111 * Retrieves or caches the decoded string for the Trie.
112 *
113 * @param source
114 * The source of the bytes in the string.
115 * @param offset
116 * The offset of the first byte to decode.
117 * @param length
118 * The length of the string to decode without a terminal zero
119 * byte.
120 * @return The value for the string.
121 */
122 private String fastDecode(final byte[] source, final int offset,
123 final int length) {
124 // Try to decode as ASCII.
125 boolean isAscii = true;
126 for (int i = 0; isAscii && (i < length); ++i) {
127 final int b = (source[offset + i] & 0xFF);
128 if (b < 0x80) {
129 myBuilder.append((char) b);
130 }
131 else {
132 isAscii = false;
133 }
134 }
135
136 String result;
137 if (!isAscii) {
138 final int encodedLength = myBuilder.length();
139
140 final String remaining = new String(source, offset + encodedLength,
141 length - encodedLength, UTF8);
142
143 myBuilder.append(remaining);
144 }
145 result = myBuilder.toString();
146
147 // Clear the string builder.
148 myBuilder.setLength(0);
149
150 return result;
151 }
152 }