diff --git a/src/bitpack.cc b/src/bitpack.cc index cae8177..cd63317 100644 --- a/src/bitpack.cc +++ b/src/bitpack.cc @@ -1,45 +1,35 @@ #include "bitpack.hh" #include "common.hh" -#include #include +#include using std::uint16_t; using std::vector; using uchar = unsigned char; using vuint16 = vector; using vuchar = vector; +using ustring = std::basic_string; /////////////////////////////////////////////////////////////////////////////// // packing // /////////////////////////////////////////////////////////////////////////////// [[nodiscard]] vuchar pack(const vuint16 &t_input) { - vuchar ret{}; - const int max_value = ipow(2, 8); - for (auto it = t_input.begin(); it != t_input.end(); ++it) { - if (*it >= max_value) { - const auto next_vec = - pack_n(static_cast(it), t_input.end(), 9); - ret.insert(ret.end(), next_vec.begin(), next_vec.end()); - return ret; - } - ret.push_back(static_cast(*it)); - } - return ret; + return pack_n(t_input.begin(), t_input.end(), 9); } - /** - * Packs \p t_input into unsigned char, assuming the max value of t_input - * only takes \p t_n bits - * - * \param t_input_begin pointer to the beginning of the vector of values to - * be packed \param t_input_end pointer to the end of the input vector - * \param t_n maximum size of an input value in bits \return Returns a - * vector of unsigned char containing the packed values from t_input - */ - [[nodiscard]] vuchar - pack_n(const vuint16::const_iterator t_input_begin, - const vuint16::const_iterator t_input_end, const int t_n) { +/** + * Packs \p t_input into unsigned char, assuming the max value of t_input + * only takes \p t_n bits + * + * \param t_input_begin pointer to the beginning of the vector of values to + * be packed \param t_input_end pointer to the end of the input vector + * \param t_n maximum size of an input value in bits \return Returns a + * vector of unsigned char containing the packed values from t_input + */ +[[nodiscard]] vuchar +pack_n(const vuint16::const_iterator t_input_begin, + const vuint16::const_iterator t_input_end, const int t_n) { if (t_n == 16) { return pack_16(t_input_begin, t_input_end); } @@ -104,71 +94,63 @@ using vuchar = vector; return ret; } - /////////////////////////////////////////////////////////////////////////////// - // unpacking // - /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// unpacking // +/////////////////////////////////////////////////////////////////////////////// - [[nodiscard]] vuint16 unpack(const vuchar &t_input) { - vuint16 ret{}; - - const int max_value = ipow(2, 8) - 1; - - // begin with 8bits - for (auto it = t_input.begin(); it != t_input.end(); ++it) { - ret.push_back(static_cast(*it)); - if (*it >= max_value) { - auto next_vec{unpack_n(it, t_input.end(), 9)}; - ret.insert(ret.end(), next_vec.begin(), next_vec.end()); - return ret; - } +uint16_t mask_n(int t_nb_bits) { + if(t_nb_bits == 0) { + return 0; } - - return ret; + uint16_t mask = mask_n(t_nb_bits - 1); + mask <<= 1; + mask |= 0x1; + return mask; } -[[nodiscard]] vuint16 unpack_n(const vuchar::const_iterator t_begin, - const vuchar::const_iterator t_end, +[[nodiscard]] vuint16 unpack(ustring &&t_input) { + return unpack_n(t_input.begin(), t_input.end(), 9); +} + +[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin, + const ustring::const_iterator t_end, const int t_n) { int step = t_n / 8; int left_shift = 0; - int middle_shift = 0; int right_shift = 0; vuint16 ret{}; - + const uint16_t mask = mask_n(t_n); + const int max_value = ipow(2, t_n); for (auto it = t_begin; it < t_end;) { - static const int max_value = ipow(2, t_n); uint16_t current_char = 0; - left_shift += step; - if (left_shift >= t_n) { - left_shift = (left_shift - t_n) + step; - } - current_char = static_cast((*it) << left_shift); - + // left bits + left_shift = + ((left_shift += step) >= t_n) ? (left_shift - t_n) + step : left_shift; + current_char = static_cast(*it << left_shift); + // right bits bool zero_rs = right_shift; right_shift -= step; if (right_shift < 0) { + // optional middle bits before right bits if (zero_rs) { - middle_shift = std::abs(right_shift); - current_char |= (*(++it)) << middle_shift; + current_char |= *(++it) << std::abs(right_shift); } - right_shift = 8 - std::abs(right_shift); } current_char |= *(++it) >> right_shift; - ret.push_back(current_char); - + // char made! + ret.push_back(current_char &= mask); if (current_char >= max_value) { const auto next_vec = unpack_n(it, t_end, t_n + 1); ret.insert(ret.end(), next_vec.begin(), next_vec.end()); return ret; } } - return ret; } - [[nodiscard]] vuint16 unpack_16(const vuchar::const_iterator t_begin, - const vuchar::const_iterator t_end) { +[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin, + const ustring::const_iterator t_end) { vuint16 ret{}; for (auto it = t_begin; it < t_end; ++it) { ret.push_back(static_cast((*it << 8) | *(++it))); diff --git a/src/bitpack.hh b/src/bitpack.hh index 5d06222..1026d22 100644 --- a/src/bitpack.hh +++ b/src/bitpack.hh @@ -7,6 +7,7 @@ #define LZW_SRC_BITPACK_H_ #include +#include #include /// \brief Bat-packs the input dynamically @@ -24,14 +25,14 @@ pack_16(const std::vector::const_iterator, const std::vector::const_iterator); [[nodiscard]] std::vector -unpack(const std::vector &); +unpack(std::basic_string &&); [[nodiscard]] std::vector -unpack_n(const std::vector::const_iterator, - const std::vector::const_iterator, const int t_n); +unpack_n(const std::basic_string::const_iterator, + const std::basic_string::const_iterator, const int t_n); [[nodiscard]] std::vector -unpack_16(const std::vector::const_iterator, - const std::vector::const_iterator); +unpack_16(const std::basic_string::const_iterator, + const std::basic_string::const_iterator); #endif /* LZW_SRC_BITPACK_H_ */ diff --git a/src/common.cc b/src/common.cc index 542ceb2..caddc51 100644 --- a/src/common.cc +++ b/src/common.cc @@ -8,7 +8,7 @@ using std::uint8_t; using std::uint16_t; using dic_comp_t = std::map, uint16_t>; -using dic_un_t = std::map>>; +// using dic_un_t = std::map>>; using ustring = std::basic_string; int ipow(int base, int exp) { @@ -44,8 +44,8 @@ int ipow(int base, int exp) { * \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine * \return const std::pair */ -std::pair dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine, - uint8_t t_c) { +std::pair dico(dic_comp_t &t_dictionary, + const uint16_t t_nr_chaine, const uint8_t t_c) { if (t_nr_chaine == 0xFFFF) { return std::make_pair(true, t_c); } @@ -58,3 +58,19 @@ std::pair dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine, t_dictionary.size()) + 255)); } + +ustring dico_uncompress(std ::map &t_dict, + const uint16_t t_code, const uint16_t t_old) { + auto &e = t_dict[t_code]; + if(e.empty()) { + e = t_dict[t_old]; + const auto temp = e[0]; + e += temp; + return e; + } + + auto str = t_dict[t_old]; + str += str[0]; + t_dict[static_cast(t_dict.size())] = std::move(str); + return e; +} diff --git a/src/common.hh b/src/common.hh index 4c3a9ac..28199d3 100644 --- a/src/common.hh +++ b/src/common.hh @@ -15,6 +15,10 @@ int ipow(int, int); /// \brief Recherche ou ajout de chaine dans le dictionnaire std::pair dico(std::map, std::uint16_t> &, - std::uint16_t, std::uint8_t); + const std::uint16_t, const std::uint8_t); + +std::basic_string +dico_uncompress(std::map> &, + const std::uint16_t, const std::uint16_t); #endif /* LZW_SRC_COMMON_H_ */ diff --git a/src/io.cc b/src/io.cc index caf4fbf..f89d11a 100644 --- a/src/io.cc +++ b/src/io.cc @@ -44,6 +44,8 @@ void write_file(FILE *const t_out, const vvuint16 &t_text) { } fwrite(&size, sizeof(size), 1, t_out); for (const auto &chunk : t_text) { + if constexpr (debug_mode) + std::printf("Chunk!\n"); write_chunk(t_out, chunk); } } diff --git a/src/main.cc b/src/main.cc index ba43023..0cf5199 100644 --- a/src/main.cc +++ b/src/main.cc @@ -6,10 +6,11 @@ * */ -#include "compress.hh" -#include "getopt.h" +#include #include #include +#include "compress.hh" +#include "uncompress.hh" using std::printf; using std::puts; @@ -45,7 +46,7 @@ void help() { puts("\t-o\tpath to the output file (if the file already exists, it will"); puts("\t\tbe overwritten). Default: input path + \".lzw\""); puts("\t-c\tcompress the input file"); - puts("\t-d\tdecompresses the input file to the output file. If no output"); + puts("\t-u\tuncompresses the input file to the output file. If no output"); puts("\t\tpath has not been entered and if the input file ends with "); puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the "); puts("\t\textension \".uncompresed\" will be added"); @@ -98,14 +99,15 @@ int main(int argc, char *argv[]) { const auto [input_path, output_path, compressing] = process_args(argc, argv); assert(!input_path.empty()); if (compressing) { - if (output_path.empty()) { - compress(input_path, nullptr); - } else { - compress(input_path, output_path.c_str()); - } + // if (output_path.empty()) { + // compress(input_path, nullptr); + // } else { + // compress(input_path, output_path.c_str()); + // } + compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str()); } else { - puts("Not yet implemented :("); - /* Inversion des types du dictionnaire pour retrouver les chaînes plus aisément */ + uncompress(input_path, + (output_path.empty()) ? nullptr : output_path.c_str()); } return 0; } diff --git a/src/uncompress.cc b/src/uncompress.cc new file mode 100644 index 0000000..43a7273 --- /dev/null +++ b/src/uncompress.cc @@ -0,0 +1,81 @@ +#include "uncompress.hh" +#include "bitpack.hh" +#include "common.hh" +#include +#include +#include + +#ifdef Debug +constexpr bool debug_mode = true; +#else +constexpr bool debug_mode = false; +#endif + +using std::fclose; +using std::fopen; +using std::fseek; +using std::string; +using std::uint16_t; +using std::vector; +using ustring = std::basic_string; +using vuint16 = vector; + +[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) { + ustring ret{}; + uint16_t old = 0; + uint16_t code = t_compressed[0]; + std::map dict{}; + ret.push_back(static_cast(code)); + old = code; + for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) { + code = *it; + const auto uncompressed{dico_uncompress(dict, code, old)}; + ret.insert(ret.end(), uncompressed.begin(), uncompressed.end()); + old = code; + } + + return ret; +} + +void uncompress(const string &t_input_name, const char *t_output_name) { + FILE *const input = std::fopen(t_input_name.c_str(), "rb"); + assert(input); + + FILE *const output = + (t_output_name != nullptr) + ? std::fopen(t_output_name, "wb") + : std::fopen((t_input_name + "_uncompressed").c_str(), "wb"); + assert(output); + + uint16_t nb_chunks = 0; + std::fseek(input, 0, SEEK_SET); + std::fread(&nb_chunks, sizeof(nb_chunks), 1, input); + + if constexpr (debug_mode) { + std::printf("Number of chunks: %d\n", nb_chunks); + } + + for (uint16_t i = 0; i < nb_chunks; ++i) { + uint16_t size_chunk = 0; + fread(&size_chunk, sizeof(size_chunk), 1, input); + if constexpr (debug_mode) { + printf("Chunk! Size of chunk:\t%d\n", size_chunk); + } + unsigned char *chunk = static_cast( + std::malloc(sizeof(unsigned char) * size_chunk)); + fread(chunk, sizeof(unsigned char), size_chunk, input); + + auto unpacked = unpack(ustring{chunk, chunk + size_chunk}); + if constexpr(debug_mode) { + for(const auto val : unpacked) { + std::printf("%d\n", val); + } + } + const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked)); + std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]), + uncompressed_chunk.size(), output); + } + + std::fclose(output); + std::fclose(input); +} diff --git a/src/uncompress.hh b/src/uncompress.hh new file mode 100644 index 0000000..d12eadb --- /dev/null +++ b/src/uncompress.hh @@ -0,0 +1,12 @@ +#ifndef LZW_SRC_UNCOMPRESS_H_ +#define LZW_SRC_UNCOMPRESS_H_ + +#include +#include + +[[nodiscard]] std::basic_string +lzw_uncompress(std::vector &&); + +void uncompress(const std::string &, const char*); + +#endif /* LZW_SRC_UNCOMPRESS_H_ */