1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.giraph.utils;
19
20 /**
21 * This Code is adapted from main/java/org/apache/mahout/math/Varint.java
22 *
23 * Licensed to the Apache Software Foundation (ASF) under one or more
24 * contributor license agreements. See the NOTICE file distributed with
25 * this work for additional information regarding copyright ownership.
26 * The ASF licenses this file to You under the Apache License, Version 2.0
27 * (the "License"); you may not use this file except in compliance with
28 * the License. You may obtain a copy of the License at
29 *
30 * http://www.apache.org/licenses/LICENSE-2.0
31 *
32 * Unless required by applicable law or agreed to in writing, software
33 * distributed under the License is distributed on an "AS IS" BASIS,
34 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35 * See the License for the specific language governing permissions and
36 * limitations under the License.
37 */
38
39 import java.io.DataInput;
40 import java.io.DataOutput;
41 import java.io.IOException;
42
43 /**
44 * <p>
45 * Encodes signed and unsigned values using a common variable-length scheme,
46 * found for example in <a
47 * href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
48 * Google's Protocol Buffers</a>. It uses fewer bytes to encode smaller values,
49 * but will use slightly more bytes to encode large values.
50 * </p>
51 * <p>
52 * Signed values are further encoded using so-called zig-zag encoding in order
53 * to make them "compatible" with variable-length encoding.
54 * </p>
55 */
56 public final class Varint {
57
58 /**
59 * private constructor
60 */
61 private Varint() {
62 }
63
64 /**
65 * Encodes a value using the variable-length encoding from <a
66 * href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
67 * Google Protocol Buffers</a>.
68 *
69 * @param value to encode
70 * @param out to write bytes to
71 * @throws IOException
72 * if {@link DataOutput} throws {@link IOException}
73 */
74 private static void writeVarLong(
75 long value,
76 DataOutput out
77 ) throws IOException {
78 while (true) {
79 int bits = ((int) value) & 0x7f;
80 value >>>= 7;
81 if (value == 0) {
82 out.writeByte((byte) bits);
83 return;
84 }
85 out.writeByte((byte) (bits | 0x80));
86 }
87 }
88
89 /**
90 * Encodes a value using the variable-length encoding from <a
91 * href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
92 * Google Protocol Buffers</a>.
93 *
94 * @param value to encode
95 * @param out to write bytes to
96 * @throws IOException
97 * if {@link DataOutput} throws {@link IOException}
98 */
99 public static void writeUnsignedVarLong(
100 long value,
101 DataOutput out
102 ) throws IOException {
103 if (value < 0) {
104 throw new IllegalStateException(
105 "Negative value passed into writeUnsignedVarLong - " + value);
106 }
107 writeVarLong(value, out);
108 }
109
110 /**
111 * Zig-zag encoding for signed longs
112 *
113 * @param value to encode
114 * @param out to write bytes to
115 * @throws IOException
116 * if {@link DataOutput} throws {@link IOException}
117 */
118 public static void writeSignedVarLong(
119 long value,
120 DataOutput out
121 ) throws IOException {
122 writeVarLong((value << 1) ^ (value >> 63), out);
123 }
124
125 /**
126 * @see #writeVarLong(long, DataOutput)
127 * @param value to encode
128 * @param out to write bytes to
129 * @throws IOException
130 */
131 private static void writeVarInt(
132 int value,
133 DataOutput out
134 ) throws IOException {
135 while (true) {
136 int bits = value & 0x7f;
137 value >>>= 7;
138 if (value == 0) {
139 out.writeByte((byte) bits);
140 return;
141 }
142 out.writeByte((byte) (bits | 0x80));
143 }
144 }
145
146 /**
147 * @see #writeVarLong(long, DataOutput)
148 * @param value to encode
149 * @param out to write bytes to
150 * @throws IOException
151 */
152 public static void writeUnsignedVarInt(
153 int value,
154 DataOutput out
155 ) throws IOException {
156 if (value < 0) {
157 throw new IllegalStateException(
158 "Negative value passed into writeUnsignedVarInt - " + value);
159 }
160 writeVarInt(value, out);
161 }
162
163 /**
164 * Zig-zag encoding for signed ints
165 *
166 * @see #writeUnsignedVarInt(int, DataOutput)
167 * @param value to encode
168 * @param out to write bytes to
169 * @throws IOException
170 */
171 public static void writeSignedVarInt(
172 int value,
173 DataOutput out
174 ) throws IOException {
175 writeVarInt((value << 1) ^ (value >> 31), out);
176 }
177
178 /**
179 * @param in to read bytes from
180 * @return decode value
181 * @throws IOException
182 * if {@link DataInput} throws {@link IOException}
183 */
184 public static long readUnsignedVarLong(DataInput in) throws IOException {
185 long tmp;
186 // CHECKSTYLE: stop InnerAssignment
187 if ((tmp = in.readByte()) >= 0) {
188 return tmp;
189 }
190 long result = tmp & 0x7f;
191 if ((tmp = in.readByte()) >= 0) {
192 result |= tmp << 7;
193 } else {
194 result |= (tmp & 0x7f) << 7;
195 if ((tmp = in.readByte()) >= 0) {
196 result |= tmp << 14;
197 } else {
198 result |= (tmp & 0x7f) << 14;
199 if ((tmp = in.readByte()) >= 0) {
200 result |= tmp << 21;
201 } else {
202 result |= (tmp & 0x7f) << 21;
203 if ((tmp = in.readByte()) >= 0) {
204 result |= tmp << 28;
205 } else {
206 result |= (tmp & 0x7f) << 28;
207 if ((tmp = in.readByte()) >= 0) {
208 result |= tmp << 35;
209 } else {
210 result |= (tmp & 0x7f) << 35;
211 if ((tmp = in.readByte()) >= 0) {
212 result |= tmp << 42;
213 } else {
214 result |= (tmp & 0x7f) << 42;
215 if ((tmp = in.readByte()) >= 0) {
216 result |= tmp << 49;
217 } else {
218 result |= (tmp & 0x7f) << 49;
219 if ((tmp = in.readByte()) >= 0) {
220 result |= tmp << 56;
221 } else {
222 result |= (tmp & 0x7f) << 56;
223 result |= ((long) in.readByte()) << 63;
224 }
225 }
226 }
227 }
228 }
229 }
230 }
231 }
232 // CHECKSTYLE: resume InnerAssignment
233 return result;
234 }
235
236 /**
237 * @param in to read bytes from
238 * @return decode value
239 * @throws IOException
240 * if {@link DataInput} throws {@link IOException}
241 */
242 public static long readSignedVarLong(DataInput in) throws IOException {
243 long raw = readUnsignedVarLong(in);
244 long temp = (((raw << 63) >> 63) ^ raw) >> 1;
245 return temp ^ (raw & (1L << 63));
246 }
247
248 /**
249 * @throws IOException
250 * if {@link DataInput} throws {@link IOException}
251 * @param in to read bytes from
252 * @return decode value
253 */
254 public static int readUnsignedVarInt(DataInput in) throws IOException {
255 int tmp;
256 // CHECKSTYLE: stop InnerAssignment
257 if ((tmp = in.readByte()) >= 0) {
258 return tmp;
259 }
260 int result = tmp & 0x7f;
261 if ((tmp = in.readByte()) >= 0) {
262 result |= tmp << 7;
263 } else {
264 result |= (tmp & 0x7f) << 7;
265 if ((tmp = in.readByte()) >= 0) {
266 result |= tmp << 14;
267 } else {
268 result |= (tmp & 0x7f) << 14;
269 if ((tmp = in.readByte()) >= 0) {
270 result |= tmp << 21;
271 } else {
272 result |= (tmp & 0x7f) << 21;
273 result |= (in.readByte()) << 28;
274 }
275 }
276 }
277 // CHECKSTYLE: resume InnerAssignment
278 return result;
279 }
280
281 /**
282 * @throws IOException
283 * if {@link DataInput} throws {@link IOException}
284 * @param in to read bytes from
285 * @return decode value
286 */
287 public static int readSignedVarInt(DataInput in) throws IOException {
288 int raw = readUnsignedVarInt(in);
289 int temp = (((raw << 31) >> 31) ^ raw) >> 1;
290 return temp ^ (raw & (1 << 31));
291 }
292
293 /**
294 * Simulation for what will happen when writing an unsigned long value
295 * as varlong.
296 * @param value to consider
297 * @return the number of bytes needed to write value.
298 * @throws IOException
299 */
300 public static long sizeOfUnsignedVarLong(long value) throws IOException {
301 int result = 0;
302 do {
303 result++;
304 value >>>= 7;
305 } while (value != 0);
306 return result;
307 }
308
309 /**
310 * Simulation for what will happen when writing a signed long value
311 * as varlong.
312 * @param value to consider
313 * @return the number of bytes needed to write value.
314 * @throws IOException
315 */
316 public static long sizeOfSignedVarLong(long value) throws IOException {
317 return sizeOfUnsignedVarLong((value << 1) ^ (value >> 63));
318 }
319
320 /**
321 * Simulation for what will happen when writing an unsigned int value
322 * as varint.
323 * @param value to consider
324 * @return the number of bytes needed to write value.
325 * @throws IOException
326 */
327 public static int sizeOfUnsignedVarInt(int value) throws IOException {
328 int cnt = 0;
329 do {
330 cnt++;
331 value >>>= 7;
332 } while (value != 0);
333 return cnt;
334 }
335
336 /**
337 * Simulation for what will happen when writing a signed int value
338 * as varint.
339 * @param value to consider
340 * @return the number of bytes needed to write value.
341 * @throws IOException
342 */
343 public static int sizeOfSignedVarInt(int value) throws IOException {
344 return sizeOfUnsignedVarInt((value << 1) ^ (value >> 31));
345 }
346
347 }