From 2b3e5e3f2b987615b38384860b8d6d8ca5351a68 Mon Sep 17 00:00:00 2001 From: Phuntsok Drak-pa Date: Fri, 15 Jun 2018 19:54:00 +0200 Subject: [PATCH] Fixed bit-packing bug --- src/bitpack.cc | 88 ++++++++++++++++++++++++++++++------------------- src/common.cc | 11 +------ src/compress.cc | 1 - 3 files changed, 55 insertions(+), 45 deletions(-) diff --git a/src/bitpack.cc b/src/bitpack.cc index 6d26401..e65c00a 100644 --- a/src/bitpack.cc +++ b/src/bitpack.cc @@ -1,7 +1,7 @@ #include "bitpack.hh" #include "common.hh" -#include #include +#include using std::uint16_t; using std::vector; @@ -22,37 +22,38 @@ vuchar pack(const vuint16 &t_input) { * Packs \p t_input into unsigned char, assuming the max value of t_input * only takes \p t_n bits * - * \param t_input_begin pointer to the beginning of the vector of values to - * be packed \param t_input_end pointer to the end of the input vector - * \param t_n maximum size of an input value in bits \return Returns a - * vector of unsigned char containing the packed values from t_input + * \param t_input_begin pointer to the beginning of the vector of values to be packed + * \param t_input_end pointer to the end of the input vector + * \param t_n maximum size of an input value in bits + * \return Returns a vector of unsigned char containing the packed values from t_input */ vuchar pack_n(const vuint16::const_iterator t_input_begin, const vuint16::const_iterator t_input_end, const int t_n) { -#ifdef Debug - std::printf("%d bits!\n", t_n); -#endif if (t_n == 16) { return pack_16(t_input_begin, t_input_end); } - vuchar ret{}; + const int max_value = ipow(2, t_n) - 1; // max value held within t_n bits - // max value with current number of bits + 1 - const int max_value = ipow(2, t_n) - 1; +#ifdef Debug + std::printf("%d bits! %ld chars remaining\n", t_n, + std::distance(t_input_begin, t_input_end)); + std::printf("max: %d\n", max_value); +#endif - uchar current_char = 0; int step = t_n / 8; int left_shift = 0; int middle_shift = 0; int right_shift = 0; + uchar current_char = 0; + bool char_touched = false; + vuchar ret{}; for (auto it = t_input_begin; it != t_input_end; ++it) { - left_shift += step; - if (left_shift >= t_n) { + if ((left_shift += step) >= t_n) { left_shift = (left_shift - t_n) + step; } ret.push_back((current_char | (*it >> left_shift)) & 0xFFu); - current_char = 0; + // current_char = 0; bool zero_right_shift = (right_shift == 0); right_shift -= step; @@ -60,19 +61,23 @@ vuchar pack_n(const vuint16::const_iterator t_input_begin, middle_shift = std::abs(right_shift); right_shift = 8 - std::abs(right_shift); if (!zero_right_shift) { - ret.push_back((*it >> middle_shift) & 0xFF); + current_char = (*it >> middle_shift) & 0xFFu; + ret.push_back(current_char); } } if (right_shift == 0) { - ret.push_back(*it & 0xff); + current_char = *it & 0xffu; + ret.push_back(current_char); current_char = 0; + char_touched = false; } else { - current_char = (*it << right_shift) & 0xFF; + current_char = (*it << right_shift) & 0xFFu; + char_touched = true; } - // at the end so we can detect `max_value` while parsing for bit-unpacking - if (*t_input_end >= max_value) { - if (current_char != 0) { + // il faut écrire la valeur pour la décompression + if (*it >= max_value) { + if (char_touched) { ret.push_back(current_char); } const auto next_vec = pack_n(it, t_input_end, t_n + 1); @@ -80,7 +85,7 @@ vuchar pack_n(const vuint16::const_iterator t_input_begin, return ret; } } - if (current_char != 0) { + if (char_touched) { ret.push_back(current_char); } return ret; @@ -88,10 +93,14 @@ vuchar pack_n(const vuint16::const_iterator t_input_begin, vuchar pack_16(const vuint16::const_iterator t_input_begin, const vuint16::const_iterator t_input_end) { +#ifdef Debug + std::printf("16 bits! %ld chars remaining\n", + std::distance(t_input_begin, t_input_end)); +#endif vuchar ret{}; std::for_each(t_input_begin, t_input_end, [&](const auto value) { - ret.push_back((value >> 8) & 0xFF); - ret.push_back(value & 0xFF); + ret.push_back((value >> 8) & 0xFFu); + ret.push_back(value & 0xFFu); }); return ret; } @@ -100,27 +109,38 @@ vuchar pack_16(const vuint16::const_iterator t_input_begin, // unpacking // /////////////////////////////////////////////////////////////////////////////// -uint16_t mask_n(int t_nb_bits) { +constexpr uint16_t mask_n(int t_nb_bits) { if (t_nb_bits == 0) { return 0; } uint16_t mask = mask_n(t_nb_bits - 1); - mask <<= 1; + mask = static_cast(mask << 1); mask |= 0x1; return mask; } +constexpr uint16_t masks[17] = { + mask_n(0), mask_n(1), mask_n(2), mask_n(3), mask_n(4), mask_n(5), + mask_n(6), mask_n(7), mask_n(8), mask_n(9), mask_n(10), mask_n(11), + mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)}; + vuint16 unpack(ustring &&t_input) { return unpack_n(t_input.begin(), t_input.end(), 9); } vuint16 unpack_n(const ustring::const_iterator t_begin, const ustring::const_iterator t_end, const int t_n) { +#ifdef Debug + std::printf("Chunk! %d bits, %ld compressed chars\n", t_n, + std::distance(t_begin, t_end)); +#endif + if (t_n == 16) { + return unpack_16(t_begin, t_end); + } int step = t_n / 8; int left_shift = 0; int right_shift = 0; vuint16 ret{}; - const uint16_t mask = mask_n(t_n); const int max_value = ipow(2, t_n); for (auto it = t_begin; it < t_end - 1; /* nope */) { uint16_t current_char = 0; @@ -134,21 +154,21 @@ vuint16 unpack_n(const ustring::const_iterator t_begin, if (right_shift < 0) { // optional middle bits before right bits if (zero_rs) { - current_char |= *++it << std::abs(right_shift); + current_char |= *++it << (-right_shift); } - right_shift = 8 - std::abs(right_shift); + right_shift = 8 + right_shift; } current_char |= *(++it) >> right_shift; // char made! - ret.push_back(current_char &= mask); - if (right_shift == 0) { - ++it; - } + ret.push_back(current_char &= masks[t_n]); if (current_char >= max_value) { - const auto next_vec = unpack_n(it, t_end, t_n + 1); + const auto next_vec = unpack_n(it + 1, t_end, t_n + 1); ret.insert(ret.end(), next_vec.begin(), next_vec.end()); return ret; } + if (right_shift == 0) { + ++it; + } } return ret; } diff --git a/src/common.cc b/src/common.cc index 3124caf..de65e8b 100644 --- a/src/common.cc +++ b/src/common.cc @@ -65,9 +65,6 @@ ustring dico_uncompress(std::map &t_dict, const uint16_t t_code, const uint16_t t_old) { // le code existe dans le dictionnaire s’il est < 256 if (t_code < 256) { -#ifdef Debug - std::printf("Code : %d\tNOT EMPTY\n", t_code); -#endif ustring e{static_cast(t_code)}; // 256 car on n'a pas encore tenté d'insérer de nouveau caractère if (t_old < 256) { @@ -85,19 +82,13 @@ ustring dico_uncompress(std::map &t_dict, // le code existe dans le dictionnaire if (!e.empty()) { -#ifdef Debug - std::printf("Code : %d\tNOT EMPTY\n", t_code); -#endif str += e[0]; - const uint16_t index = static_cast(t_dict.size() + 256); + const auto index = static_cast(t_dict.size() + 256); t_dict[index] = str; return e; } // le code n'existe pas encore dans le dictionnaire -#ifdef Debug - std::printf("Code : %d\tEMPTY\n", t_code); -#endif str += str[0]; e = str; t_dict[t_code] = e; diff --git a/src/compress.cc b/src/compress.cc index 50360ec..334234d 100644 --- a/src/compress.cc +++ b/src/compress.cc @@ -77,7 +77,6 @@ void compress(const std::string &t_in_file, const char *t_out_file) { if (out == nullptr) { std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4 << ": could not open output file. Aborting...\n"; - // input_file.close(); std::fclose(input_file); exit(1); }