From a4f722f6fb88c6014a57dcc85624b45e00a1e7bb Mon Sep 17 00:00:00 2001 From: Phuntsok Drak-pa Date: Thu, 26 Apr 2018 11:54:02 +0200 Subject: [PATCH] parallel compression kinda working, but ultimately segfault --- .gitignore | 5 ++ src/common.cc | 12 ++--- src/common.hh | 7 ++- src/compress.cc | 123 +++++++++++++++++++++++++++++++----------------- src/compress.hh | 7 ++- src/main.cc | 74 +++++++++++++++++------------ 6 files changed, 142 insertions(+), 86 deletions(-) diff --git a/.gitignore b/.gitignore index 792db02..e4c99a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ *~ gmon\.out +/cmake-build-debug/Makefile + +cmake-build-debug/ + +\.idea/ diff --git a/src/common.cc b/src/common.cc index 05496cb..d223f5f 100644 --- a/src/common.cc +++ b/src/common.cc @@ -6,8 +6,8 @@ #include "common.hh" using uint8_t = std::uint8_t; -using uint32_t = std::uint32_t; -using dic_t = std::map, uint32_t>; +using uint16_t = std::uint16_t; +using dic_t = std::map, uint16_t>; /** * Cette fonction a pour double usage la recherche d’une chaine de caractères @@ -27,10 +27,10 @@ using dic_t = std::map, uint32_t>; * \param c Caractère suivant la chaine de caractères \p nr_chaine * \return std::pair */ -const std::pair -dico(std::map, uint32_t> &t_dictionary, - uint32_t t_nr_chaine, uint8_t t_c) { - if (t_nr_chaine == 0xFFFFFFFF) +const std::pair +dico(std::map, uint16_t> &t_dictionary, + uint16_t t_nr_chaine, uint8_t t_c) { + if (t_nr_chaine == 0xFFFF) return std::make_pair(true, t_c); auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)]; return (e) ? std::make_pair(true, e) diff --git a/src/common.hh b/src/common.hh index 7cfd222..cc1b367 100644 --- a/src/common.hh +++ b/src/common.hh @@ -10,9 +10,8 @@ #include /// \brief Recherche ou ajout de chaine dans le dictionnaire -const std::pair -dico(std::map, std::uint32_t> - &t_dictionary, - std::uint32_t t_nr_chaine, std::uint8_t t_c); +const std::pair +dico(std::map, std::uint16_t> &, + uint16_t, uint8_t); #endif /* LZW_SRC_COMMON_H_ */ diff --git a/src/compress.cc b/src/compress.cc index fe43f1d..d3a040a 100644 --- a/src/compress.cc +++ b/src/compress.cc @@ -3,14 +3,22 @@ * \brief Implementation of compression */ +#ifdef Debug +constexpr bool debug_mode = true; +#else +constexpr bool debug_mode = false; +#endif + #include "compress.hh" #include "utf8.hh" #include #include +#include +#include -using dict_t = std::map, uint32_t>; +using dict_t = std::map, uint16_t>; using ustring = std::basic_string; // chaine non encodée -using uvec = std::vector; // chaine encodée +using uvec = std::vector; // chaine encodée using std::printf; /** @@ -24,39 +32,40 @@ using std::printf; * \param t_dictionary Dictionnaire de compression * \return std::vector */ -const uvec lzw_compress(const ustring &t_text, dict_t &t_dictionary) { +void lzw_compress(const std::vector &t_text, uvec &t_res) { + dict_t dictionary{}; std::puts("Compressing..."); - uvec res{}; - uint32_t w = 0xFFFFFFFF; - uint32_t len = 0; + // uvec res{}; + uint16_t w = 0xFFFF; + uint16_t len = 0; constexpr size_t DICT_MAX = 7936; /* 12 bits */ -#ifdef Debug size_t progress = 0; -#endif for (const auto &c : t_text) { ++len; -#ifdef Debug - printf("\rprogress: %zu / %zu", ++progress, t_text.size()); -#endif + if constexpr (debug_mode) { + printf("\rprogress: %zu / %zu", ++progress, t_text.size()); + } - if (/* len > LENGTH_MAX || */ t_dictionary.size() >= DICT_MAX) { - res.push_back(static_cast(w)); - w = c; + if (/* len > LENGTH_MAX || */ dictionary.size() >= DICT_MAX) { + t_res.push_back(static_cast(w)); + w = static_cast(c); len = 0; - } else if (const auto &[exists, pos] = dico(t_dictionary, w, c); exists) { + } else if (const auto &[exists, pos] = + dico(dictionary, w, static_cast(c)); + exists) { w = pos; } else { - res.push_back(static_cast(w)); - w = c; + t_res.push_back(static_cast(w)); + w = static_cast(c); len = 0; } } printf("\n"); - return res; + // return res; } /** @@ -73,41 +82,69 @@ const uvec lzw_compress(const ustring &t_text, dict_t &t_dictionary) { void compress(const std::string &t_in_file, const char *t_out_file) { // Fichier d’entrée std::ifstream input_file{t_in_file}; + if(!input_file.is_open()) { + std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 2 + << ": could not open output file \"" << t_in_file << "\". Aborting...\n"; + exit(1); + } // Fichier de sortie - FILE *out = (t_out_file) - ? fopen(t_out_file, "wb") - : fopen(std::string{t_out_file, ".lzw"}.c_str(), "wb"); + const char *filename = + (t_out_file) ? t_out_file : "output.lzw"; + FILE *out = fopen(filename, "wb"); + if(!out) { + std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4 + << ": could not open output file \"" << filename << "\". Aborting...\n"; + input_file.close(); + exit(1); + } - input_file.seekg(0, std::ios::end); - // string contenant le fichier d’entrée - ustring str(static_cast(input_file.tellg()), - static_cast(0)); - input_file.seekg(0, std::ios::beg); + // input_file.seekg(0, std::ios::end); + // // string contenant le fichier d’entrée + // ustring str(static_cast(input_file.tellg()), + // static_cast(0)); + // input_file.seekg(0, std::ios::beg); - // assignation du contenu du fichier à str - str.assign((std::istreambuf_iterator(input_file)), - std::istreambuf_iterator()); + // // assignation du contenu du fichier à str + // str.assign((std::istreambuf_iterator(input_file)), + // std::istreambuf_iterator()); - printf("Size of input file: %zu\n", str.size()); + // printf("Size of input file: %zu\n", str.size()); - dict_t dictionary{}; + // dict_t dictionary{}; - const auto comp_str{lzw_compress(str, dictionary)}; + // const auto comp_str{lzw_compress(str, dictionary)}; - printf("\n############################################\n"); - printf(" Compressed!\n"); - printf("############################################\n\n"); - printf("Size of compressed string: %zu\n", comp_str.size()); - printf("Size of dictionary: %zu\n", dictionary.size()); - printf("Compression ratio: %.10f\n", - static_cast(str.size() / comp_str.size())); + // thread pool + std::vector> threads{}; - for(const auto c : comp_str) - write_utf8(out, c); + // char chunk[32768]; + std::vector chunk{}; + chunk.reserve(32768); + while (input_file.read(chunk.data(), 32768)) { + threads.push_back(std::make_pair(std::thread{}, uvec{})); + threads.back().first = + std::thread{lzw_compress, chunk, ref(threads.back().second)}; + if (threads.size() >= 8) { + for (auto &elem : threads) { + elem.first.join(); + } + for (auto &elem : threads) { + for (const auto c : elem.second) { + write_utf8(out, c); + } + } + threads.clear(); + } + } + + if(input_file.tellg() != std::ios::end) { + std::puts("Leftovers..."); + } + + // for (const auto c : comp_str) + // write_utf8(out, c); fclose(out); input_file.close(); - - return; } diff --git a/src/compress.hh b/src/compress.hh index 848a634..30de860 100644 --- a/src/compress.hh +++ b/src/compress.hh @@ -8,12 +8,11 @@ #include "common.hh" #include +#include /// \brief Compression d'une chaine de caractères -const std::vector -lzw_compress(const std::basic_string &t_text, - std::map, std::uint32_t> - &t_dictionary); +void lzw_compress(const std::vector &t_text, + std::vector &t_rec); /// \brief Wrapper de \ref lzw_compress void compress(const std::string &t_in_file, const char *t_out_file); diff --git a/src/main.cc b/src/main.cc index 7fefe6c..c9641e6 100644 --- a/src/main.cc +++ b/src/main.cc @@ -6,6 +6,12 @@ * */ +#ifdef Debug +constexpr bool debug_mode = true; +#else +constexpr bool debug_mode = false; +#endif + #include "compress.hh" #include "getopt.h" @@ -48,10 +54,11 @@ void help() { } int main(int argc, char *argv[]) { -#ifdef Debug - for (int i = 0; i < argc; ++i) - printf("argv[%d] = %s\n", i, argv[i]); -#endif + + if constexpr (debug_mode) { + for (int i = 0; i < argc; ++i) + printf("argv[%d] = %s\n", i, argv[i]); + } std::string input_path{}; std::string output_path{}; @@ -71,54 +78,55 @@ int main(int argc, char *argv[]) { break; switch (c) { case 0: { -#ifdef Debug - printf("\noption %s", long_options[option_index].name); - if (optarg) { - printf(" with arg %s\n", optarg); + if constexpr (debug_mode) { + printf("\noption %s", long_options[option_index].name); + if (optarg) { + printf(" with arg %s\n", optarg); + } } -#endif break; } case 'h': { -#ifdef Debug - printf("From main - option --help passed\n"); -#endif + if constexpr (debug_mode) { + printf("From main - option --help passed\n"); + } help(); return 0; } case 'i': { -#ifdef Debug - printf("From main - option --input with value '%s'\n", optarg); -#endif + if constexpr (debug_mode) { + printf("From main - option --input with value '%s'\n", optarg); + } input_path = optarg; break; } case 'o': { -#ifdef Debug - printf("From main - option --output with value '%s'\n", optarg); -#endif + if constexpr (debug_mode) { + printf("From main - option --output with value '%s'\n", optarg); + } output_path = optarg; break; } case 'c': { -#ifdef Debug - printf("From main - option --compress\n"); -#endif + if constexpr (debug_mode) { + printf("From main - option --compress\n"); + } compressing = true; break; } case 'u': { -#ifdef Debug - printf("From main - option --uncompress\n"); -#endif + if constexpr (debug_mode) { + printf("From main - option --uncompress\n"); + } compressing = false; break; } - case '?': { + case '?': + default: { puts("Error: unknown parameter."); -#ifdef Debug - printf("From main - option -?\n"); -#endif + if constexpr (debug_mode) { + printf("From main - option -?\n"); + } help(); return 1; } @@ -140,7 +148,15 @@ int main(int argc, char *argv[]) { - bit-packing, limiter la taille du dictionnaire pour un certain nombre de bits. */ - compress(input_path, output_path.c_str()); + if constexpr (debug_mode) { + puts("Beginning compression"); + } + if(output_path.empty()) { + compress(input_path, nullptr); + } else { + compress(input_path, output_path.c_str()); + } + // compress(input_path, output_path.c_str()); } else { puts("Not yet implemented :("); /*