libStatGen Software 1
Loading...
Searching...
No Matches
MiniDeflate.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "MiniDeflate.h"
19
20// Convenient constants and macros
21//
22#define EMPTY_KEY 123
23#define uchar unsigned char
24
25#ifndef min
26#define min(a,b) (((a)<(b))?(a):(b))
27#endif
28
29MiniDeflate::MiniDeflate()
30{
31 buffer = new uchar [BUFFER_SIZE + 5];
32 hash_keys = new uchar [HASH_SIZE];
33 hash_values = new uchar * [HASH_SIZE * HASH_DEPTH];
34}
35
36MiniDeflate::~MiniDeflate()
37{
38 delete [] buffer;
39 delete [] hash_keys;
40 delete [] hash_values;
41}
42
43void MiniDeflate::EvaluateMatch(unsigned char * in, int len, int hash,
44 unsigned char * & best_pos, int & best_match)
45{
46 int max = min(len, 0xFFFF + 66);
47
48 for (int i = HASH_DEPTH; i > 0; i--)
49 // Check each possible match (up to HASH_DEPTH)
50 {
51 uchar * pos = hash_values[hash * HASH_DEPTH + ((hash_keys[hash] + i) % HASH_DEPTH)];
52
53 if (pos == NULL || in - pos >= 0x4001) break;
54
55 int match = 0;
56
57 while (match < max && pos[match] == in[match])
58 match++;
59
60 if (match > best_match)
61 {
62 best_match = match;
63 best_pos = pos;
64 }
65 }
66
67 // If string seems pretty unique, add to hash table
68 if (best_match < OKAY_MATCH)
69 {
70 int delta = hash_keys[hash] = (uchar)((hash_keys[hash] + 1) & 7);
71 hash_values[hash * 8 + delta] = in;
72 }
73}
74
75void MiniDeflate::QuoteLiterals(unsigned char * & in, int literal,
76 unsigned char * & out, int & buffer_len,
77 FILE * output)
78{
79 if (buffer_len < 0)
80 {
81 fwrite(buffer, out - buffer, 1, output);
82 buffer_len = BUFFER_SIZE;
83 out = buffer;
84 }
85
86 while (buffer_len < literal)
87 {
88 literal -= buffer_len;
89 while (buffer_len--)
90 {
91 *out = *in;
92 in++;
93 out++;
94 }
95 fwrite(buffer, BUFFER_SIZE, 1, output);
96 buffer_len = BUFFER_SIZE;
97 out = buffer;
98 }
99
100 while (literal--)
101 {
102 *out = *in;
103 in++;
104 out++;
105 buffer_len--;
106 }
107}
108
109void MiniDeflate::OutputLiterals(unsigned char * & in, int literal,
110 unsigned char * & out, int & buffer_len,
111 FILE * output)
112{
113 while (literal > 0)
114 if (literal < 16)
115 {
116 *out = (char) literal;
117 out++;
118 buffer_len--;
119 QuoteLiterals(in, literal, out, buffer_len, output);
120 break;
121 }
122 else if (literal < 31)
123 {
124 *out = 15;
125 out++;
126 buffer_len--;
127 QuoteLiterals(in, 15, out, buffer_len, output);
128 *out = (uchar)(literal - 15);
129 out++;
130 buffer_len--;
131 QuoteLiterals(in, literal - 15, out, buffer_len, output);
132 break;
133 }
134 else
135 {
136 int length = min(literal, 0xFFFF + 31);
137 literal -= length;
138 length -= 31;
139
140 *out = 0;
141 out++;
142 *out = (uchar)(length >> 8);
143 out++;
144 *out = (uchar)(length & 0xFF);
145 out++;
146 buffer_len -= 3;
147
148 QuoteLiterals(in, length + 31, out, buffer_len, output);
149 }
150}
151
152
153void MiniDeflate::Deflate(FILE * output, void * void_input, size_t len)
154{
155 uchar * in = (uchar *) void_input;
156 uchar * out = (uchar *) buffer;
157 int buffer_len = BUFFER_SIZE;
158
159 for (int i = 0; i < HASH_SIZE; i++) hash_keys[i] = EMPTY_KEY;
160
161 uchar * in2 = in;
162
163 while (len > 2)
164 {
165 // Hash the current input value
166 int hash = ((in[0] << 16) | (in[1] << 8) | in[2]) % HASH_SIZE;
167
168 if (hash_keys[hash] != EMPTY_KEY)
169 // Possible matches in hash table
170 {
171 int best_match = 0;
172 uchar * best_pos = NULL;
173
174 EvaluateMatch(in, len, hash, best_pos, best_match);
175
176 // If there are no decent matches
177 if (best_match < 3)
178 {
179 in++;
180 len--;
181 continue;
182 }
183
184 // Try look ahead if match isn't great
185 while (best_match < OKAY_MATCH && len > 3)
186 {
187 // Peek to see if we could get a better match
188 int next_hash = ((in[1] << 16) | (in[2] << 8) | in[3]) % HASH_SIZE;
189
190 if (hash_keys[next_hash] == EMPTY_KEY) break;
191
192 int next_match = 0;
193 uchar * next_pos = NULL;
194
195 EvaluateMatch(in + 1, len - 1, next_hash, next_pos, next_match);
196
197 // Didn't find a better match
198 if (next_match <= best_match + 1) break;
199
200 // Found a better match, so try again
201 in++;
202 len--;
203 best_match = next_match;
204 best_pos = next_pos;
205 }
206
207 int best_offset = in - best_pos - 1;
208
209 // This is where we output stuff
210 // Check if we have some literals to output first
211 OutputLiterals(in2, in - in2, out, buffer_len, output);
212
213 in2 = in += best_match;
214 len -= best_match;
215
216 if (best_match < 17 && best_offset < 0x1000)
217 {
218 *out = (uchar)(((best_match - 1) << 4) | (best_offset >> 8));
219 out++;
220 *out = (uchar)(best_offset & 0xFF);
221 out++;
222 buffer_len -= 2;
223 }
224 else if (best_match < 66)
225 {
226 *out = (uchar)(16 | (best_offset >> 10));
227 out++;
228 *out = (uchar)((best_offset >> 2) & 0xFF);
229 out++;
230 *out = (uchar)((best_offset << 6) | (best_match - 2));
231 out++;
232 buffer_len -= 3;
233 }
234 else
235 {
236 *out = (uchar)(16 | (best_offset >> 10));
237 out++;
238 *out = (uchar)((best_offset >> 2) & 0xFF);
239 out++;
240 *out = (uchar)(best_offset << 6);
241 out++;
242 best_match -= 66;
243 *out = (uchar)(best_match >> 8);
244 out++;
245 *out = (uchar)(best_match & 0xFF);
246 out++;
247 buffer_len -= 5;
248 }
249
250 if (buffer_len <= 0)
251 {
252 fwrite(buffer, out - buffer, 1, output);
253 buffer_len = BUFFER_SIZE;
254 out = buffer;
255 }
256 }
257 // Never seen this sequence before
258 else
259 {
260 hash_keys[hash] = 0;
261 for (int i = 1; i < HASH_DEPTH; i++) hash_values[hash * 8 + i] = NULL;
262 hash_values[hash * 8] = in;
263 in++;
264 len--;
265 }
266 }
267
268 // Check if we have some trailing literals to output
269 in += len;
270 OutputLiterals(in2, in - in2, out, buffer_len, output);
271
272 // Flush output
273 if (out != buffer) fwrite(buffer, out - buffer, 1, output);
274}
275
276void MiniDeflate::CiteLiteral(unsigned char * & out, int literal,
277 unsigned char * & in, int & buffer_len,
278 FILE * input)
279{
280 while (buffer_len < literal)
281 {
282 literal -= buffer_len;
283 while (buffer_len--)
284 {
285 *out = *in;
286 in++;
287 out++;
288 }
289 buffer_len = fread(buffer + 5, 1, BUFFER_SIZE, input);
290 in = buffer + 5;
291 }
292
293 while (literal--)
294 {
295 *out = *in;
296 in++;
297 out++;
298 buffer_len--;
299 }
300}
301
302
303void MiniDeflate::Inflate(FILE * input, void * void_output, size_t len)
304{
305 uchar * out = (uchar *) void_output;
306 uchar * in = (uchar *) buffer + 5;
307 int buffer_len = BUFFER_SIZE;
308
309 buffer_len = fread(buffer + 5, 1, BUFFER_SIZE, input);
310
311 while (len)
312 {
313 int match_len = *in >> 4;
314
315 // Matching a literal
316 if (match_len == 0)
317 {
318 match_len = *in & 0x0F;
319 in++, buffer_len--;
320
321 // If match_len == 0 then string is longer than 30 characters
322 // Strings of 16 - 30 characters are encoded as two short strings
323 if (match_len == 0)
324 {
325 match_len = (in[0] << 8) + in[1] + 31;
326 in += 2;
327 buffer_len -= 2;
328 }
329
330 CiteLiteral(out, match_len, in, buffer_len, input);
331 len -= match_len;
332 }
333 // Long match, 14 bit offset
334 else if (match_len == 1)
335 {
336 int offset = (((in[0] & 0x0F) << 10) | (in[1] << 2) | (in[2] >> 6)) + 1;
337 match_len = (in[2] & 0x3F) + 2;
338 in += 3;
339 buffer_len -= 3;
340
341 if (match_len == 2)
342 {
343 match_len = ((in[0] << 8) | in[1]) + 66;
344 in += 2;
345 buffer_len -= 2;
346 }
347
348 uchar * match_pos = out - offset;
349 len -= match_len;
350 while (match_len--)
351 {
352 *out = *match_pos;
353 out++, match_pos++;
354 }
355 }
356 // Typical short match
357 else
358 {
359 int offset = (((in[0] & 0x0F) << 8) | in[1]) + 1;
360 in += 2;
361 buffer_len -= 2;
362
363 uchar * match_pos = out - offset;
364 len -= ++match_len;
365 while (match_len--)
366 {
367 *out = *match_pos;
368 out++, match_pos++;
369 }
370 }
371
372 if (buffer_len < 5)
373 {
374 uchar * in2 = (uchar *) buffer + 5 - buffer_len;
375 while (in2 != buffer + 5)
376 {
377 *in2 = *in;
378 in2++;
379 in++;
380 }
381
382 in = buffer + 5 - buffer_len;
383 buffer_len += fread(buffer + 5, 1, BUFFER_SIZE, input);
384 }
385 }
386
387 if (buffer_len) fseek(input, -buffer_len, SEEK_CUR);
388}
389
390
391