From 0523fe77f2a44c958e6c7a6ed0bca1f20f63b8ee Mon Sep 17 00:00:00 2001 From: Phuntsok Drak-pa Date: Sun, 24 Jun 2018 18:03:09 +0200 Subject: [PATCH] Bug identified, first char of new chunk not witten (see uncompress.cc:59) --- src/bitpack.cc | 3 --- src/compress.cc | 24 ++++++------------------ src/io.cc | 3 --- src/uncompress.cc | 37 +++++++++++++++++-------------------- src/uncompress.hh | 3 ++- 5 files changed, 25 insertions(+), 45 deletions(-) diff --git a/src/bitpack.cc b/src/bitpack.cc index da20f0e..d9822f4 100644 --- a/src/bitpack.cc +++ b/src/bitpack.cc @@ -48,9 +48,6 @@ vuchar pack(const vuint16 &t_input) { vuchar pack_n(const vuint16::const_iterator t_input_begin, const vuint16::const_iterator t_input_end, vuchar &t_res, int t_n) { -#ifdef Debug - std::printf("%d bits!\n", t_n); -#endif // Debug if (t_n == 16) { return pack_16(t_input_begin, t_input_end, t_res); } diff --git a/src/compress.cc b/src/compress.cc index 7ee43db..50ef335 100644 --- a/src/compress.cc +++ b/src/compress.cc @@ -37,30 +37,18 @@ ustring read_file(const string &filename) { return res; } -/** - * La chaîne de caractères \p t_text est lue caractère par caractère, et est - * selon la valeur de retour de la fonction \ref dico (permettant dans le même - * temps la création du dictionnaire), on rajoute ou non un nouveau caractère - * encodé sur 12bits dans le chunk courant. Dès que le dictionnaire est plein - * (2^12 caractères), le chunk est sauvegardé et vidé, et le dictionnaire est - * réinitialisé. - * - * \param t_text Chaîne de caractères uint8_t représentant le fichier d'entrée - * \return Vecteur de chunks (vecteurs de uint16_t) - */ vvuint16 lzw_compress(ustring &&t_text) { - std::puts("Compressing..."); - const auto DICT_MAX = static_cast(ipow(2, 17) - 256); /* 16 bits */ + vvuint16 res{}; + const auto DICT_MAX = static_cast(ipow(2, 14) - 256); /* 16 bits */ uint16_t w = 0xFFFF; vuint16 chunk{}; - vvuint16 res{}; dict_t dict{}; for (const auto c : t_text) { if (dict.size() >= DICT_MAX) { - res.push_back(std::move(chunk)); - chunk = vuint16{}; - dict = dict_t{}; + res.push_back(chunk); w = 0xFFFF; + chunk.clear(); + dict.clear(); } if (const auto &[exists, pos] = dico(dict, w, static_cast(c)); exists) { @@ -88,7 +76,7 @@ vvuint16 lzw_compress(ustring &&t_text) { */ void compress(const std::string &t_in_file, const char *t_out_file) { std::ofstream out{(t_out_file != nullptr) ? t_out_file : "output.lzw", - std::ios::out | std::ios::binary}; + ios::out | ios::binary}; if (!out.is_open()) { std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4 << ": could not open output file. Aborting...\n"; diff --git a/src/io.cc b/src/io.cc index 543046b..83c5edb 100644 --- a/src/io.cc +++ b/src/io.cc @@ -48,9 +48,6 @@ void write_file(std::ofstream &t_out, const vvuint16 &t_chunks) { * \param t_chunk Chunk to be written to \p t_out */ void write_chunk(std::ofstream &t_out, const vuint16 &t_chunk) { -#ifdef Debug - std::printf("Chunk!\n"); -#endif const auto output = pack(t_chunk); const auto chunk_size = static_cast(output.size()); t_out.write(reinterpret_cast(&chunk_size), sizeof(chunk_size)); diff --git a/src/uncompress.cc b/src/uncompress.cc index 638e7b7..d303f65 100644 --- a/src/uncompress.cc +++ b/src/uncompress.cc @@ -20,16 +20,11 @@ ustring lzw_uncompress(vuint16 &&t_compressed) { ustring ret{}; uint16_t old = 0; std::map dict{}; - // uint16_t v = t_compressed[0]; - // ret.append({static_cast(v)}); - // old = v; ret.append({static_cast(t_compressed[0])}); old = t_compressed[0]; for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) { - // v = *it; const auto uncompressed{dico_uncompress(dict, *it, old)}; ret.insert(ret.end(), uncompressed.begin(), uncompressed.end()); - // old = v; old = *it; } @@ -39,28 +34,30 @@ ustring lzw_uncompress(vuint16 &&t_compressed) { void uncompress(const string &t_input_name, const char *t_output_name) { FILE *const input = std::fopen(t_input_name.c_str(), "rb"); assert(input); - FILE *const output = - (t_output_name != nullptr) - ? std::fopen(t_output_name, "wb") - : std::fopen((t_input_name + "_uncompressed").c_str(), "wb"); - assert(output); + std::ofstream output{(t_output_name != nullptr) + ? t_output_name + : t_input_name + "_uncompressed", + std::ios::out | std::ios::binary | std::ios::app}; + assert(output.is_open()); uint16_t nb_chunks = 0; std::fread(&nb_chunks, sizeof(nb_chunks), 1, input); for (uint16_t i = 0; i < nb_chunks; ++i) { uncompress_chunk(input, output); } - std::fclose(output); + output.close(); std::fclose(input); } -void uncompress_chunk(FILE *const input, FILE *const output) { +void uncompress_chunk(FILE *const t_input, std::ofstream &t_output) { uint32_t size_chunk = 0; - fread(&size_chunk, sizeof(size_chunk), 1, input); - auto chunk = new unsigned char[size_chunk]; - fread(chunk, sizeof(unsigned char), size_chunk, input); - auto unpacked = unpack(ustring{chunk, chunk + size_chunk}); - delete[] chunk; - const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked)); - std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]), - uncompressed_chunk.size(), output); + fread(&size_chunk, sizeof(size_chunk), 1, t_input); + auto chunk = std::make_unique(size_chunk); + fread(chunk.get(), sizeof(unsigned char), size_chunk, t_input); + auto unpacked = unpack(ustring{chunk.get(), chunk.get() + size_chunk}); + auto uncompressed_chunk = lzw_uncompress(std::move(unpacked)); + + // attention here for bug /////////////////////////////////////////////////// + uncompressed_chunk.push_back(0xFF); + t_output.write(reinterpret_cast(uncompressed_chunk.data()), + sizeof(uncompressed_chunk[0]) * uncompressed_chunk.size()); } diff --git a/src/uncompress.hh b/src/uncompress.hh index f76bd3c..699c77a 100644 --- a/src/uncompress.hh +++ b/src/uncompress.hh @@ -1,6 +1,7 @@ #ifndef LZW_SRC_UNCOMPRESS_H_ #define LZW_SRC_UNCOMPRESS_H_ +#include #include #include #include @@ -9,6 +10,6 @@ std::basic_string lzw_uncompress(std::vector &&); void uncompress(const std::string &, const char *); -void uncompress_chunk(FILE *, FILE *); +void uncompress_chunk(FILE *, std::ofstream&); #endif /* LZW_SRC_UNCOMPRESS_H_ */