Mitchbux
New Coder
Hello, i'm trying to compress data with a 'classify' and a 'predict' procedure : the classify procedure would try to sort data before the predict procedure to compress better.
I have wrote both predict and procedure now I need help on the classify_read : Can anyone help me write the code to ensure all the data IS decoded correctly ?
You Can find the whole code @ Online CPP - IDE, Code Editor, Compiler. Code in the link is also posted below:
I have wrote both predict and procedure now I need help on the classify_read : Can anyone help me write the code to ensure all the data IS decoded correctly ?
You Can find the whole code @ Online CPP - IDE, Code Editor, Compiler. Code in the link is also posted below:
C++:
// compile with tcc for windows
// tcc main.c -o shrink.exe
#include <fcntl.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
// .:: / auxilary memory routines /::.
// ...................................
typedef uint8_t byte;
typedef uint16_t ushort;
void *zsx_alloc(size_t item_size, size_t n_item) {
size_t *x = (size_t *)calloc(1, sizeof(size_t) * 2 + n_item * item_size);
x[0] = item_size;
x[1] = n_item;
return x + 2;
}
void *zsx_extend(void *m, size_t new_n) {
size_t *x = (size_t *)m - 2;
x = (size_t *)realloc(x, sizeof(size_t) * 2 + *x * new_n);
if (new_n > x[1])
memset((char *)(x + 2) + x[0] * x[1], 0, x[0] * (new_n - x[1]));
x[1] = new_n;
return x + 2;
}
void zsx_clear(void *m) {
size_t *x = (size_t *)m - 2;
memset(m, 0, x[0] * x[1]);
}
#define zsx_new(type, n) zsx_alloc(sizeof(type), n)
#define zsx_del(m) free((size_t *)(m)-2);
#define zsx_len(m) *((size_t *)m - 1)
#define zsx_setsize(m, n) m = (byte *)zsx_extend(m, n)
#define zsx_extend(m) m = zsx_extend(m, zsx_len(m) * 2);
// .:: / bit writer procedures /::.
// ................................
typedef struct {
int power;
int bits;
int start;
char *data;
int canextend;
} zsx_writer_t;
void zsx_write_bit(zsx_writer_t *p, int v) {
p->bits += (v & 1) << (8 - (++p->power));
if (p->power >= 8) {
p->data[p->start++] = p->bits;
p->power = 0;
p->bits = 0;
if (p->canextend && ((zsx_len(p->data)) - p->start) < sizeof(int))
zsx_extend(p->data);
}
}
void zsx_write_value(zsx_writer_t *p, unsigned int v, int n) {
while (n > 0) {
n--;
zsx_write_bit(p, (v >> n) & 1);
}
}
void zsx_flushWrite(zsx_writer_t *p) {
while (p->power > 0)
zsx_write_bit(p, 0);
}
// .:: / bit reader procedures /::.
// .................................
typedef struct {
int power;
int bits;
int start;
char *data;
} zsx_reader_t;
int zsx_read_bit(zsx_reader_t *r) {
int v = (r->bits >> (8 - (++r->power)) & 1);
if (r->power >= 8) {
r->power = 0;
r->bits = r->data[r->start++];
}
return v;
}
unsigned int zsx_read_value(zsx_reader_t *r, int n) {
unsigned int v = 0;
while (n > 0) {
n--;
v = v << 1;
v += zsx_read_bit(r);
}
return v;
}
#define zsx_extendable 1
#define zsx_statically 0
//creates a bit writer
zsx_writer_t *zsx_writer(byte *out, int ext) {
zsx_writer_t *r = zsx_new(zsx_writer_t, 1);
r->power = 0;
r->bits = 0;
r->start = 0;
r->data = (char *)out;
r->canextend = ext;
return r;
}
//creates a bit reader
zsx_reader_t *zsx_reader(byte *in) {
zsx_reader_t *r = zsx_new(zsx_reader_t, 1);
r->power = 0;
r->bits = in[0];
r->start = 1;
r->data = (char *)in;
return r;
}
//Logarithm function : returns the number of bits needed to store a maximum value not reached
int zsx_lf(uint64_t n) {
int m = 1;
while ((uint64_t)(1 << m) < n)
m++;
return m;
}
//buffer size : 1Gb
#define zsx_buffer 0x40000000
#define zsx_chunk 0x40000000
#define zsx_byte 0x100
#define zsx_size 0x200
//two buffers when encoding ; only zsx_bytes_buffer is predicted.
//the result buffer stores everything concatenated at the and
byte *zsx_result_buffer;
byte *zsx_bytes_buffer;
byte *zsx_key_buffer;
//you need to call these routines before using zsx and to clear everything
#define zsx_init zsx_result_buffer = zsx_new(byte, zsx_buffer); zsx_bytes_buffer = zsx_new(byte, zsx_buffer); zsx_key_buffer = zsx_new(byte, zsx_buffer);
#define zsx_exit zsx_del(zsx_result_buffer); zsx_del(zsx_bytes_buffer); zsx_del(zsx_key_buffer);
void zsx_quicksort(zsx_writer_t *w, int *list, int len) {
if (len < 2) return;
int pivot = list[len / 2];
byte *stacker = zsx_new(byte, len);
zsx_clear(stacker);
int z, s, x;
for (s = 0, x = len - 1; ; s++, x--) {
while (list[s] < pivot) s++;
while (list[x] > pivot) x--;
if (s >= x) break;
stacker[s] = stacker[x] = 1;
list[s] ^= list[x];
list[x] ^= list[s];
list[s] ^= list[x];
}
if (w)
for (z = 0; z < len; z++)
zsx_write_bit(w, stacker[z]);
zsx_quicksort(w, list, s);
zsx_quicksort(w, list + s, len - s);
}
void zsx_quicksort_read(zsx_reader_t *r, int *list, int len) {
if (len < 2) return;
int pivot = list[len / 2];
byte *stacker = zsx_new(byte, len);
for (z = 0; z < len; z++)
stacker[z]=zsx_read_bit(r);
int z, s, x;
for (s = 0, x = len - 1; ; s++, x--) {
while (stacker[s]==0) s++;
while (stacker[x]==0) x--;
if (s >= x) break;
list[s] ^= list[x];
list[x] ^= list[s];
list[s] ^= list[x];
}
zsx_quicksort(w, list, s);
zsx_quicksort(w, list + s, len - s);
}
// .::/ zsx_classify /::.
// Prepares the buffer for prediction. Uses two buffers out :
// the key has no need to be compressed and is very small, the buffer (byte *result) is then predicted.
// .......................
// byte *key : the unpredicted buffer
// int *keyLen : the size written to key
// byte *data : the data to encode
// int len : the size of the data
// byte *result : the buffer for encoded data
// .......................
// returns : the length of the result encoded data
int zsx_classify(byte *key, int *keyLen, byte *data, int len, byte *result)
{
int z, s, x;
int full = 0, stop = 0, size = 0;
int *stacker = zsx_new(int, 0x10000);
int *reverse = zsx_new(int, 0x1000);
int *counter = zsx_new(int, zsx_size);
int **slacker = zsx_new(int*, zsx_size);
for (s = 0; s < zsx_size; s++)
slacker[s] = zsx_new(int, 0x1000);
zsx_writer_t *w = zsx_writer(key, zsx_statically);
while (full < len)
{
zsx_clear(counter);
zsx_clear(stacker);
int next = 0;
int last = 0;
while (full < len)
{
x = (data[full++]<<8)^data[full++];
zsx_write_bit(w, stacker[x] ? 1 : 0);
if (stacker[x] == 0)
{
stacker[x] = ++next;
stacker[reverse[next]] = 0;
reverse[next] = x;
if (next & 0xFF == 0)
{
zsx_quicksort(w, reverse+last, next-last);
for(s=last+1;s<=next;s++)
slacker[s][counter[s]++] = reverse[s];
if (counter[1] == 0xFFF)break;
last = next;
}
}
else zsx_write_value(w, stacker[x], 9);
if (next == zsx_size)next=0, last=0;
}
for (s = 0; s < zsx_size; s++)
for (x = 0; x < counter[s]; x++)
result[stop++] = slacker[s][x] >> 8,
result[stop++] = slacker[s][x];
}
zsx_flushWrite(w);
*keyLen = w->start;
zsx_del(w);
zsx_del(stacker);
zsx_del(reverse);
zsx_del(counter);
for (s = 0; s < zsx_size; s++)
zsx_del(slacker[s]);
zsx_del(slacker);
return stop;
}
// .::/ zsx_classify /::.
// Predicts the data and writes the new (smaller) buffer at the same address in memory
// Uses a control bits buffer
// .......................
// zsx_writer_t *w : the control bits whether its predicted or in the buffer
// byte *data : the data to predict
// int len : the size of the data
// .......................
// returns : the length of the result not predicted data
int zsx_predict(zsx_writer_t *w, byte *data, int len)
{
int ***counter = zsx_new(int**, zsx_byte);
int **stacker = zsx_new(int*, zsx_byte);
int **maximum = zsx_new(int*, zsx_byte);
int z, s, x, full, start = 0;
for (s = 0; s < 0x100; s++) {
counter[s] = zsx_new(int*, zsx_byte);
stacker[s] = zsx_new(int, zsx_byte);
maximum[s] = zsx_new(int, zsx_byte);
zsx_clear(stacker[s]);
zsx_clear(maximum[s]);
for (x = 0; x < zsx_byte; x++)
{
counter[s][x] = zsx_new(int, zsx_byte);
zsx_clear(counter[s][x]);
}
}
z = s = x = full = 0;
while (full < len)
{
x = data[full++];
zsx_write_bit(w, stacker[z][s] == x ? 1 : 0);
if (stacker[z][s] != x)
{
data[start++] = x;
}
counter[z][s][x]++;
if (counter[z][s][x] > maximum[z][s])
{
maximum[z][s] = counter[z][s][x];
stacker[z][s] = x;
}
z = s;
s = x;
}
for (s = 0; s < zsx_byte; s++)
{
for (x = 0; x < zsx_byte; x++)
zsx_del(counter[s][x]);
zsx_del(counter[s]);
zsx_del(stacker[s]);
zsx_del(maximum[s]);
}
zsx_del(counter);
zsx_del(stacker);
zsx_del(maximum);
return start;
}
/****** zsx ******/
byte *zsx_encode(byte *data) {
int length = zsx_len(data);
byte *result = zsx_result_buffer + sizeof(int) * 4;
byte *bytes = zsx_bytes_buffer;
int keyLen = 0;
int start = zsx_classify(zsx_key_buffer, &keyLen, data, length, bytes);
zsx_writer_t *key = zsx_writer(zsx_key_buffer + keyLen, zsx_statically);
zsx_write_value(key, length, 32);
size_t len = zsx_predict(key, zsx_bytes_buffer, start);
zsx_flushWrite(key);
((int*)zsx_result_buffer)[0] = start;
((int*)zsx_result_buffer)[1] = len;
((int*)zsx_result_buffer)[2] = keyLen;
((int*)zsx_result_buffer)[3] = key->start;
memcpy(result, zsx_bytes_buffer, len);
result += len;
memcpy(result, zsx_key_buffer, keyLen + key->start);
zsx_len(zsx_result_buffer) = len + keyLen + key->start + sizeof(int) * 4;
zsx_del(key);
return zsx_result_buffer;
}
byte * zsx_classify_read(byte *key, byte*bytes, byte*result, int len)
{
int z, s, x;
int decoded = 0, start = 0, stop = 0;
int **slacker = zsx_new(int*, zsx_size);
for (s = 0; s < zsx_size; s++)
slacker[s] = zsx_new(int, zsx_size);
int *counter = zsx_new(int, zsx_size);
int *indexof = zsx_new(int, zsx_size);
int *length = zsx_new(int, zsx_size);
int *sorted = zsx_new(int, zsx_size);
int *code = zsx_new(int, len);
zsx_writer_t * r = zsx_reader(key);
while (decoded < len)
{
zsx_clear(counter);
zsx_clear(length);
int next = zsx_read_value(r, 16);
int last = 0;
int size = 0;
while ((decoded + (last * 2)) < len)
{
int index;
if (zsx_read_bit(r))
{
index = ++next;
code[size++] = index;
if (next & 0xFF == 0)
{
for (x = 0, s = last + 1; s <= next; s++)
sorted[x++] = s, length[s]++;
zsx_quicksort_read(r, sorted, x);
for (s = 0; s < x; s++)
indexof[sorted[s]] = s;
//todo : get reverse dictionary for values until now and decode indexes
if (length[1] == 0xFFF)break;
last = next;
}
}
else
{
index = zsx_read_value(r, 9);
code[size++] = index;
if (index > next)
{
//todo : decode index with previous reverse dictionary
}
}
if (next == 0x200) next = 0, last = 0;
}
}
for (s = 0; s < zsx_size; s++)
zsx_del(slacker[s])
zsx_del(slacker);
zsx_del(counter);
zsx_del(indexof);
zsx_del(length);
zsx_del(sorted);
zsx_del(code);
}
byte *zsx_predict_read(zsx_reader_t *r, byte *data, byte * bytes, int length)
{
int z, s, x;
int ***counter = zsx_new(int**, zsx_byte);
int **stacker = zsx_new(int*, zsx_byte);
int **maximum = zsx_new(int*, zsx_byte);
for (s = 0; s < 0x100; s++) {
counter[s] = zsx_new(int*, zsx_byte);
stacker[s] = zsx_new(int, zsx_byte);
maximum[s] = zsx_new(int, zsx_byte);
zsx_clear(stacker[s]);
zsx_clear(maximum[s]);
for (x = 0; x < zsx_byte; x++)
{
counter[s][x] = zsx_new(int, zsx_byte);
zsx_clear(counter[s][x]);
}
}
int read = 0;
int start = 0;
z = s = x = 0;
while (read < length)
{
if (zsx_read_bit(r) == 0)
{
bytes[read++] = x = data[start++];
}
else
{
bytes[read++] = x = stacker[z][s];
}
counter[z][s][x]++;
if (counter[z][s][x] > maximum[z][s])
{
maximum[z][s] = counter[z][s][x];
stacker[z][s] = x;
}
z = s;
s = x;
}
for (s = 0; s < zsx_byte; s++)
{
for (x = 0; x < zsx_byte; x++)
zsx_del(counter[s][x]);
zsx_del(counter[s]);
zsx_del(stacker[s]);
zsx_del(maximum[s]);
}
zsx_del(counter);
zsx_del(stacker);
zsx_del(maximum);
return bytes;
}
byte *zsx_decode(byte *data) {
int length = ((int*)data)[0];
int len = ((int*)data)[1];
int keyLen = ((int*)data)[2];
byte * buffer = data + sizeof(int) * 4;
byte *first_buffer = data + sizeof(int) * 4 + len;
byte *key_buffer = data + sizeof(int) * 4 + len + keyLen;
zsx_reader_t *key = zsx_reader(key_buffer);
byte * bytes = zsx_bytes_buffer;
int next_length = zsx_read_value(key, 32);
byte * result = zsx_new(byte, next_length);
zsx_predict_read(key, buffer, bytes, length);
zsx_classify_read(first_buffer, bytes, result, next_length);
puts(" ");
memcpy(zsx_result_buffer, result, next_length);
zsx_del(result);
zsx_len(zsx_result_buffer) = next_length;
zsx_result_buffer[next_length] = 0;
return zsx_result_buffer;
}
int zsx_bytes_read;
void CALLBACK FIO(DWORD E1, DWORD N2, LPOVERLAPPED ol) {
zsx_bytes_read = N2;
}
int testdec(char *filename) {
//*** Testing Encoding and Decoding ***
//win32 file handling
OVERLAPPED ol;
ol.Offset = 0;
ol.OffsetHigh = 0;
HANDLE hFile = CreateFile(
filename,
GENERIC_READ,
FILE_SHARE_READ,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
NULL);
if (hFile == INVALID_HANDLE_VALUE) {
printf("Can't read file!\n");
return 0;
};
int encoded = 0;
DWORD file_high;
int file_size = GetFileSize(hFile, &file_high);
char *outfilename = zsx_new(char, 256);
strcpy(outfilename, filename);
strcat(outfilename, ".dec");
FILE *f = fopen(outfilename, "wb");
if (f == NULL) {
printf("Can't create file!\n");
return 0;
};
//zsx library has a few set of macros.
//this one takes care of everything about initialization
zsx_init
while (encoded < file_size) {
ReadFileEx(hFile, zsx_result_buffer, zsx_chunk, &ol, FIO);
SleepEx(INFINITE, TRUE);
encoded += zsx_bytes_read;
ol.Offset += zsx_bytes_read;
printf("\nEncoding ...\n");
//zsx_len is a dereferenced pointer can be used as left-op or right-op and describes the size of allocated memory
zsx_len(zsx_result_buffer) = zsx_bytes_read;
byte * data = zsx_encode(zsx_result_buffer);
//data = zsx_encode(data);
printf("\nDecoding ...\n");
byte *decompressed = zsx_decode(data);
//decompressed = zsx_decode(decompressed);
puts(decompressed);
//zsx_len contains the size of the data but not what was actually allocated at first.
// !! Caution !! do not use these macro for any other context than compression.
fwrite(decompressed, zsx_len(decompressed), 1, f);
}
zsx_exit
fclose(f);
CloseHandle(hFile);
printf("Original data decoded to file %s.\n", outfilename);
return 1;
}
int main(int argc, char **argv) {
if (argc < 2)
printf("Usage : %s filename\n", argv[0]);
else if (argc == 2)
testdec(argv[1]);
return 0;
}
Last edited by a moderator: