Fixed bit-packing bug
This commit is contained in:
parent
5b9f3ccd6a
commit
2b3e5e3f2b
@ -1,7 +1,7 @@
|
|||||||
#include "bitpack.hh"
|
#include "bitpack.hh"
|
||||||
#include "common.hh"
|
#include "common.hh"
|
||||||
#include <cmath>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
using std::uint16_t;
|
using std::uint16_t;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
@ -22,37 +22,38 @@ vuchar pack(const vuint16 &t_input) {
|
|||||||
* Packs \p t_input into unsigned char, assuming the max value of t_input
|
* Packs \p t_input into unsigned char, assuming the max value of t_input
|
||||||
* only takes \p t_n bits
|
* only takes \p t_n bits
|
||||||
*
|
*
|
||||||
* \param t_input_begin pointer to the beginning of the vector of values to
|
* \param t_input_begin pointer to the beginning of the vector of values to be packed
|
||||||
* be packed \param t_input_end pointer to the end of the input vector
|
* \param t_input_end pointer to the end of the input vector
|
||||||
* \param t_n maximum size of an input value in bits \return Returns a
|
* \param t_n maximum size of an input value in bits
|
||||||
* vector of unsigned char containing the packed values from t_input
|
* \return Returns a vector of unsigned char containing the packed values from t_input
|
||||||
*/
|
*/
|
||||||
vuchar pack_n(const vuint16::const_iterator t_input_begin,
|
vuchar pack_n(const vuint16::const_iterator t_input_begin,
|
||||||
const vuint16::const_iterator t_input_end, const int t_n) {
|
const vuint16::const_iterator t_input_end, const int t_n) {
|
||||||
#ifdef Debug
|
|
||||||
std::printf("%d bits!\n", t_n);
|
|
||||||
#endif
|
|
||||||
if (t_n == 16) {
|
if (t_n == 16) {
|
||||||
return pack_16(t_input_begin, t_input_end);
|
return pack_16(t_input_begin, t_input_end);
|
||||||
}
|
}
|
||||||
vuchar ret{};
|
const int max_value = ipow(2, t_n) - 1; // max value held within t_n bits
|
||||||
|
|
||||||
// max value with current number of bits + 1
|
#ifdef Debug
|
||||||
const int max_value = ipow(2, t_n) - 1;
|
std::printf("%d bits! %ld chars remaining\n", t_n,
|
||||||
|
std::distance(t_input_begin, t_input_end));
|
||||||
|
std::printf("max: %d\n", max_value);
|
||||||
|
#endif
|
||||||
|
|
||||||
uchar current_char = 0;
|
|
||||||
int step = t_n / 8;
|
int step = t_n / 8;
|
||||||
int left_shift = 0;
|
int left_shift = 0;
|
||||||
int middle_shift = 0;
|
int middle_shift = 0;
|
||||||
int right_shift = 0;
|
int right_shift = 0;
|
||||||
|
uchar current_char = 0;
|
||||||
|
bool char_touched = false;
|
||||||
|
vuchar ret{};
|
||||||
|
|
||||||
for (auto it = t_input_begin; it != t_input_end; ++it) {
|
for (auto it = t_input_begin; it != t_input_end; ++it) {
|
||||||
left_shift += step;
|
if ((left_shift += step) >= t_n) {
|
||||||
if (left_shift >= t_n) {
|
|
||||||
left_shift = (left_shift - t_n) + step;
|
left_shift = (left_shift - t_n) + step;
|
||||||
}
|
}
|
||||||
ret.push_back((current_char | (*it >> left_shift)) & 0xFFu);
|
ret.push_back((current_char | (*it >> left_shift)) & 0xFFu);
|
||||||
current_char = 0;
|
// current_char = 0;
|
||||||
|
|
||||||
bool zero_right_shift = (right_shift == 0);
|
bool zero_right_shift = (right_shift == 0);
|
||||||
right_shift -= step;
|
right_shift -= step;
|
||||||
@ -60,19 +61,23 @@ vuchar pack_n(const vuint16::const_iterator t_input_begin,
|
|||||||
middle_shift = std::abs(right_shift);
|
middle_shift = std::abs(right_shift);
|
||||||
right_shift = 8 - std::abs(right_shift);
|
right_shift = 8 - std::abs(right_shift);
|
||||||
if (!zero_right_shift) {
|
if (!zero_right_shift) {
|
||||||
ret.push_back((*it >> middle_shift) & 0xFF);
|
current_char = (*it >> middle_shift) & 0xFFu;
|
||||||
|
ret.push_back(current_char);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (right_shift == 0) {
|
if (right_shift == 0) {
|
||||||
ret.push_back(*it & 0xff);
|
current_char = *it & 0xffu;
|
||||||
|
ret.push_back(current_char);
|
||||||
current_char = 0;
|
current_char = 0;
|
||||||
|
char_touched = false;
|
||||||
} else {
|
} else {
|
||||||
current_char = (*it << right_shift) & 0xFF;
|
current_char = (*it << right_shift) & 0xFFu;
|
||||||
|
char_touched = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// at the end so we can detect `max_value` while parsing for bit-unpacking
|
// il faut écrire la valeur pour la décompression
|
||||||
if (*t_input_end >= max_value) {
|
if (*it >= max_value) {
|
||||||
if (current_char != 0) {
|
if (char_touched) {
|
||||||
ret.push_back(current_char);
|
ret.push_back(current_char);
|
||||||
}
|
}
|
||||||
const auto next_vec = pack_n(it, t_input_end, t_n + 1);
|
const auto next_vec = pack_n(it, t_input_end, t_n + 1);
|
||||||
@ -80,7 +85,7 @@ vuchar pack_n(const vuint16::const_iterator t_input_begin,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (current_char != 0) {
|
if (char_touched) {
|
||||||
ret.push_back(current_char);
|
ret.push_back(current_char);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -88,10 +93,14 @@ vuchar pack_n(const vuint16::const_iterator t_input_begin,
|
|||||||
|
|
||||||
vuchar pack_16(const vuint16::const_iterator t_input_begin,
|
vuchar pack_16(const vuint16::const_iterator t_input_begin,
|
||||||
const vuint16::const_iterator t_input_end) {
|
const vuint16::const_iterator t_input_end) {
|
||||||
|
#ifdef Debug
|
||||||
|
std::printf("16 bits! %ld chars remaining\n",
|
||||||
|
std::distance(t_input_begin, t_input_end));
|
||||||
|
#endif
|
||||||
vuchar ret{};
|
vuchar ret{};
|
||||||
std::for_each(t_input_begin, t_input_end, [&](const auto value) {
|
std::for_each(t_input_begin, t_input_end, [&](const auto value) {
|
||||||
ret.push_back((value >> 8) & 0xFF);
|
ret.push_back((value >> 8) & 0xFFu);
|
||||||
ret.push_back(value & 0xFF);
|
ret.push_back(value & 0xFFu);
|
||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -100,27 +109,38 @@ vuchar pack_16(const vuint16::const_iterator t_input_begin,
|
|||||||
// unpacking //
|
// unpacking //
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
uint16_t mask_n(int t_nb_bits) {
|
constexpr uint16_t mask_n(int t_nb_bits) {
|
||||||
if (t_nb_bits == 0) {
|
if (t_nb_bits == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
uint16_t mask = mask_n(t_nb_bits - 1);
|
uint16_t mask = mask_n(t_nb_bits - 1);
|
||||||
mask <<= 1;
|
mask = static_cast<uint16_t>(mask << 1);
|
||||||
mask |= 0x1;
|
mask |= 0x1;
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr uint16_t masks[17] = {
|
||||||
|
mask_n(0), mask_n(1), mask_n(2), mask_n(3), mask_n(4), mask_n(5),
|
||||||
|
mask_n(6), mask_n(7), mask_n(8), mask_n(9), mask_n(10), mask_n(11),
|
||||||
|
mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)};
|
||||||
|
|
||||||
vuint16 unpack(ustring &&t_input) {
|
vuint16 unpack(ustring &&t_input) {
|
||||||
return unpack_n(t_input.begin(), t_input.end(), 9);
|
return unpack_n(t_input.begin(), t_input.end(), 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
vuint16 unpack_n(const ustring::const_iterator t_begin,
|
vuint16 unpack_n(const ustring::const_iterator t_begin,
|
||||||
const ustring::const_iterator t_end, const int t_n) {
|
const ustring::const_iterator t_end, const int t_n) {
|
||||||
|
#ifdef Debug
|
||||||
|
std::printf("Chunk! %d bits, %ld compressed chars\n", t_n,
|
||||||
|
std::distance(t_begin, t_end));
|
||||||
|
#endif
|
||||||
|
if (t_n == 16) {
|
||||||
|
return unpack_16(t_begin, t_end);
|
||||||
|
}
|
||||||
int step = t_n / 8;
|
int step = t_n / 8;
|
||||||
int left_shift = 0;
|
int left_shift = 0;
|
||||||
int right_shift = 0;
|
int right_shift = 0;
|
||||||
vuint16 ret{};
|
vuint16 ret{};
|
||||||
const uint16_t mask = mask_n(t_n);
|
|
||||||
const int max_value = ipow(2, t_n);
|
const int max_value = ipow(2, t_n);
|
||||||
for (auto it = t_begin; it < t_end - 1; /* nope */) {
|
for (auto it = t_begin; it < t_end - 1; /* nope */) {
|
||||||
uint16_t current_char = 0;
|
uint16_t current_char = 0;
|
||||||
@ -134,21 +154,21 @@ vuint16 unpack_n(const ustring::const_iterator t_begin,
|
|||||||
if (right_shift < 0) {
|
if (right_shift < 0) {
|
||||||
// optional middle bits before right bits
|
// optional middle bits before right bits
|
||||||
if (zero_rs) {
|
if (zero_rs) {
|
||||||
current_char |= *++it << std::abs(right_shift);
|
current_char |= *++it << (-right_shift);
|
||||||
}
|
}
|
||||||
right_shift = 8 - std::abs(right_shift);
|
right_shift = 8 + right_shift;
|
||||||
}
|
}
|
||||||
current_char |= *(++it) >> right_shift;
|
current_char |= *(++it) >> right_shift;
|
||||||
// char made!
|
// char made!
|
||||||
ret.push_back(current_char &= mask);
|
ret.push_back(current_char &= masks[t_n]);
|
||||||
if (right_shift == 0) {
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
if (current_char >= max_value) {
|
if (current_char >= max_value) {
|
||||||
const auto next_vec = unpack_n(it, t_end, t_n + 1);
|
const auto next_vec = unpack_n(it + 1, t_end, t_n + 1);
|
||||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
if (right_shift == 0) {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -65,9 +65,6 @@ ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
|
|||||||
const uint16_t t_code, const uint16_t t_old) {
|
const uint16_t t_code, const uint16_t t_old) {
|
||||||
// le code existe dans le dictionnaire s’il est < 256
|
// le code existe dans le dictionnaire s’il est < 256
|
||||||
if (t_code < 256) {
|
if (t_code < 256) {
|
||||||
#ifdef Debug
|
|
||||||
std::printf("Code : %d\tNOT EMPTY\n", t_code);
|
|
||||||
#endif
|
|
||||||
ustring e{static_cast<unsigned char>(t_code)};
|
ustring e{static_cast<unsigned char>(t_code)};
|
||||||
// 256 car on n'a pas encore tenté d'insérer de nouveau caractère
|
// 256 car on n'a pas encore tenté d'insérer de nouveau caractère
|
||||||
if (t_old < 256) {
|
if (t_old < 256) {
|
||||||
@ -85,19 +82,13 @@ ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
|
|||||||
|
|
||||||
// le code existe dans le dictionnaire
|
// le code existe dans le dictionnaire
|
||||||
if (!e.empty()) {
|
if (!e.empty()) {
|
||||||
#ifdef Debug
|
|
||||||
std::printf("Code : %d\tNOT EMPTY\n", t_code);
|
|
||||||
#endif
|
|
||||||
str += e[0];
|
str += e[0];
|
||||||
const uint16_t index = static_cast<uint16_t>(t_dict.size() + 256);
|
const auto index = static_cast<uint16_t>(t_dict.size() + 256);
|
||||||
t_dict[index] = str;
|
t_dict[index] = str;
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// le code n'existe pas encore dans le dictionnaire
|
// le code n'existe pas encore dans le dictionnaire
|
||||||
#ifdef Debug
|
|
||||||
std::printf("Code : %d\tEMPTY\n", t_code);
|
|
||||||
#endif
|
|
||||||
str += str[0];
|
str += str[0];
|
||||||
e = str;
|
e = str;
|
||||||
t_dict[t_code] = e;
|
t_dict[t_code] = e;
|
||||||
|
@ -77,7 +77,6 @@ void compress(const std::string &t_in_file, const char *t_out_file) {
|
|||||||
if (out == nullptr) {
|
if (out == nullptr) {
|
||||||
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
|
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
|
||||||
<< ": could not open output file. Aborting...\n";
|
<< ": could not open output file. Aborting...\n";
|
||||||
// input_file.close();
|
|
||||||
std::fclose(input_file);
|
std::fclose(input_file);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user