Added (broken) uncompressing algo + fixed bit-unpacking

This commit is contained in:
Phuntsok Drak-pa 2018-06-10 21:21:06 +02:00
parent 9f70b01886
commit fcfe944c5d
8 changed files with 181 additions and 81 deletions

View File

@ -1,31 +1,21 @@
#include "bitpack.hh" #include "bitpack.hh"
#include "common.hh" #include "common.hh"
#include <algorithm>
#include <cmath> #include <cmath>
#include <algorithm>
using std::uint16_t; using std::uint16_t;
using std::vector; using std::vector;
using uchar = unsigned char; using uchar = unsigned char;
using vuint16 = vector<uint16_t>; using vuint16 = vector<uint16_t>;
using vuchar = vector<uchar>; using vuchar = vector<uchar>;
using ustring = std::basic_string<unsigned char>;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// packing // // packing //
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuchar pack(const vuint16 &t_input) { [[nodiscard]] vuchar pack(const vuint16 &t_input) {
vuchar ret{}; return pack_n(t_input.begin(), t_input.end(), 9);
const int max_value = ipow(2, 8);
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
if (*it >= max_value) {
const auto next_vec =
pack_n(static_cast<vuint16::const_iterator>(it), t_input.end(), 9);
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
}
ret.push_back(static_cast<unsigned char>(*it));
}
return ret;
} }
/** /**
@ -108,67 +98,59 @@ using vuchar = vector<uchar>;
// unpacking // // unpacking //
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuint16 unpack(const vuchar &t_input) { uint16_t mask_n(int t_nb_bits) {
vuint16 ret{}; if(t_nb_bits == 0) {
return 0;
const int max_value = ipow(2, 8) - 1;
// begin with 8bits
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
ret.push_back(static_cast<uint16_t>(*it));
if (*it >= max_value) {
auto next_vec{unpack_n(it, t_input.end(), 9)};
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
} }
uint16_t mask = mask_n(t_nb_bits - 1);
mask <<= 1;
mask |= 0x1;
return mask;
} }
return ret; [[nodiscard]] vuint16 unpack(ustring &&t_input) {
return unpack_n(t_input.begin(), t_input.end(), 9);
} }
[[nodiscard]] vuint16 unpack_n(const vuchar::const_iterator t_begin, [[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
const vuchar::const_iterator t_end, const ustring::const_iterator t_end,
const int t_n) { const int t_n) {
int step = t_n / 8; int step = t_n / 8;
int left_shift = 0; int left_shift = 0;
int middle_shift = 0;
int right_shift = 0; int right_shift = 0;
vuint16 ret{}; vuint16 ret{};
const uint16_t mask = mask_n(t_n);
const int max_value = ipow(2, t_n);
for (auto it = t_begin; it < t_end;) { for (auto it = t_begin; it < t_end;) {
static const int max_value = ipow(2, t_n);
uint16_t current_char = 0; uint16_t current_char = 0;
left_shift += step; // left bits
if (left_shift >= t_n) { left_shift =
left_shift = (left_shift - t_n) + step; ((left_shift += step) >= t_n) ? (left_shift - t_n) + step : left_shift;
} current_char = static_cast<uint16_t>(*it << left_shift);
current_char = static_cast<uint16_t>((*it) << left_shift); // right bits
bool zero_rs = right_shift; bool zero_rs = right_shift;
right_shift -= step; right_shift -= step;
if (right_shift < 0) { if (right_shift < 0) {
// optional middle bits before right bits
if (zero_rs) { if (zero_rs) {
middle_shift = std::abs(right_shift); current_char |= *(++it) << std::abs(right_shift);
current_char |= (*(++it)) << middle_shift;
} }
right_shift = 8 - std::abs(right_shift); right_shift = 8 - std::abs(right_shift);
} }
current_char |= *(++it) >> right_shift; current_char |= *(++it) >> right_shift;
ret.push_back(current_char); // char made!
ret.push_back(current_char &= mask);
if (current_char >= max_value) { if (current_char >= max_value) {
const auto next_vec = unpack_n(it, t_end, t_n + 1); const auto next_vec = unpack_n(it, t_end, t_n + 1);
ret.insert(ret.end(), next_vec.begin(), next_vec.end()); ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret; return ret;
} }
} }
return ret; return ret;
} }
[[nodiscard]] vuint16 unpack_16(const vuchar::const_iterator t_begin, [[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
const vuchar::const_iterator t_end) { const ustring::const_iterator t_end) {
vuint16 ret{}; vuint16 ret{};
for (auto it = t_begin; it < t_end; ++it) { for (auto it = t_begin; it < t_end; ++it) {
ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it))); ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it)));

View File

@ -7,6 +7,7 @@
#define LZW_SRC_BITPACK_H_ #define LZW_SRC_BITPACK_H_
#include <cstdint> #include <cstdint>
#include <string>
#include <vector> #include <vector>
/// \brief Bat-packs the input dynamically /// \brief Bat-packs the input dynamically
@ -24,14 +25,14 @@ pack_16(const std::vector<std::uint16_t>::const_iterator,
const std::vector<std::uint16_t>::const_iterator); const std::vector<std::uint16_t>::const_iterator);
[[nodiscard]] std::vector<std::uint16_t> [[nodiscard]] std::vector<std::uint16_t>
unpack(const std::vector<unsigned char> &); unpack(std::basic_string<unsigned char> &&);
[[nodiscard]] std::vector<std::uint16_t> [[nodiscard]] std::vector<std::uint16_t>
unpack_n(const std::vector<unsigned char>::const_iterator, unpack_n(const std::basic_string<unsigned char>::const_iterator,
const std::vector<unsigned char>::const_iterator, const int t_n); const std::basic_string<unsigned char>::const_iterator, const int t_n);
[[nodiscard]] std::vector<std::uint16_t> [[nodiscard]] std::vector<std::uint16_t>
unpack_16(const std::vector<unsigned char>::const_iterator, unpack_16(const std::basic_string<unsigned char>::const_iterator,
const std::vector<unsigned char>::const_iterator); const std::basic_string<unsigned char>::const_iterator);
#endif /* LZW_SRC_BITPACK_H_ */ #endif /* LZW_SRC_BITPACK_H_ */

View File

@ -8,7 +8,7 @@
using std::uint8_t; using std::uint8_t;
using std::uint16_t; using std::uint16_t;
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>; using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>; // using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
using ustring = std::basic_string<unsigned char>; using ustring = std::basic_string<unsigned char>;
int ipow(int base, int exp) { int ipow(int base, int exp) {
@ -44,8 +44,8 @@ int ipow(int base, int exp) {
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine * \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
* \return const std::pair<bool, uint16_t> * \return const std::pair<bool, uint16_t>
*/ */
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine, std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
uint8_t t_c) { const uint16_t t_nr_chaine, const uint8_t t_c) {
if (t_nr_chaine == 0xFFFF) { if (t_nr_chaine == 0xFFFF) {
return std::make_pair(true, t_c); return std::make_pair(true, t_c);
} }
@ -58,3 +58,19 @@ std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
t_dictionary.size()) + t_dictionary.size()) +
255)); 255));
} }
ustring dico_uncompress(std ::map<uint16_t, ustring> &t_dict,
const uint16_t t_code, const uint16_t t_old) {
auto &e = t_dict[t_code];
if(e.empty()) {
e = t_dict[t_old];
const auto temp = e[0];
e += temp;
return e;
}
auto str = t_dict[t_old];
str += str[0];
t_dict[static_cast<uint16_t>(t_dict.size())] = std::move(str);
return e;
}

View File

@ -15,6 +15,10 @@ int ipow(int, int);
/// \brief Recherche ou ajout de chaine dans le dictionnaire /// \brief Recherche ou ajout de chaine dans le dictionnaire
std::pair<bool, std::uint16_t> std::pair<bool, std::uint16_t>
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &, dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
std::uint16_t, std::uint8_t); const std::uint16_t, const std::uint8_t);
std::basic_string<unsigned char>
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
const std::uint16_t, const std::uint16_t);
#endif /* LZW_SRC_COMMON_H_ */ #endif /* LZW_SRC_COMMON_H_ */

View File

@ -44,6 +44,8 @@ void write_file(FILE *const t_out, const vvuint16 &t_text) {
} }
fwrite(&size, sizeof(size), 1, t_out); fwrite(&size, sizeof(size), 1, t_out);
for (const auto &chunk : t_text) { for (const auto &chunk : t_text) {
if constexpr (debug_mode)
std::printf("Chunk!\n");
write_chunk(t_out, chunk); write_chunk(t_out, chunk);
} }
} }

View File

@ -6,10 +6,11 @@
* *
*/ */
#include "compress.hh" #include <getopt.h>
#include "getopt.h"
#include <cassert> #include <cassert>
#include <tuple> #include <tuple>
#include "compress.hh"
#include "uncompress.hh"
using std::printf; using std::printf;
using std::puts; using std::puts;
@ -45,7 +46,7 @@ void help() {
puts("\t-o\tpath to the output file (if the file already exists, it will"); puts("\t-o\tpath to the output file (if the file already exists, it will");
puts("\t\tbe overwritten). Default: input path + \".lzw\""); puts("\t\tbe overwritten). Default: input path + \".lzw\"");
puts("\t-c\tcompress the input file"); puts("\t-c\tcompress the input file");
puts("\t-d\tdecompresses the input file to the output file. If no output"); puts("\t-u\tuncompresses the input file to the output file. If no output");
puts("\t\tpath has not been entered and if the input file ends with "); puts("\t\tpath has not been entered and if the input file ends with ");
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the "); puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
puts("\t\textension \".uncompresed\" will be added"); puts("\t\textension \".uncompresed\" will be added");
@ -98,14 +99,15 @@ int main(int argc, char *argv[]) {
const auto [input_path, output_path, compressing] = process_args(argc, argv); const auto [input_path, output_path, compressing] = process_args(argc, argv);
assert(!input_path.empty()); assert(!input_path.empty());
if (compressing) { if (compressing) {
if (output_path.empty()) { // if (output_path.empty()) {
compress(input_path, nullptr); // compress(input_path, nullptr);
// } else {
// compress(input_path, output_path.c_str());
// }
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
} else { } else {
compress(input_path, output_path.c_str()); uncompress(input_path,
} (output_path.empty()) ? nullptr : output_path.c_str());
} else {
puts("Not yet implemented :(");
/* Inversion des types du dictionnaire pour retrouver les chaînes plus aisément */
} }
return 0; return 0;
} }

81
src/uncompress.cc Normal file
View File

@ -0,0 +1,81 @@
#include "uncompress.hh"
#include "bitpack.hh"
#include "common.hh"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#ifdef Debug
constexpr bool debug_mode = true;
#else
constexpr bool debug_mode = false;
#endif
using std::fclose;
using std::fopen;
using std::fseek;
using std::string;
using std::uint16_t;
using std::vector;
using ustring = std::basic_string<unsigned char>;
using vuint16 = vector<uint16_t>;
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
ustring ret{};
uint16_t old = 0;
uint16_t code = t_compressed[0];
std::map<uint16_t, ustring> dict{};
ret.push_back(static_cast<unsigned char>(code));
old = code;
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
code = *it;
const auto uncompressed{dico_uncompress(dict, code, old)};
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
old = code;
}
return ret;
}
void uncompress(const string &t_input_name, const char *t_output_name) {
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
assert(input);
FILE *const output =
(t_output_name != nullptr)
? std::fopen(t_output_name, "wb")
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
assert(output);
uint16_t nb_chunks = 0;
std::fseek(input, 0, SEEK_SET);
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
if constexpr (debug_mode) {
std::printf("Number of chunks: %d\n", nb_chunks);
}
for (uint16_t i = 0; i < nb_chunks; ++i) {
uint16_t size_chunk = 0;
fread(&size_chunk, sizeof(size_chunk), 1, input);
if constexpr (debug_mode) {
printf("Chunk! Size of chunk:\t%d\n", size_chunk);
}
unsigned char *chunk = static_cast<unsigned char *>(
std::malloc(sizeof(unsigned char) * size_chunk));
fread(chunk, sizeof(unsigned char), size_chunk, input);
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
if constexpr(debug_mode) {
for(const auto val : unpacked) {
std::printf("%d\n", val);
}
}
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
uncompressed_chunk.size(), output);
}
std::fclose(output);
std::fclose(input);
}

12
src/uncompress.hh Normal file
View File

@ -0,0 +1,12 @@
#ifndef LZW_SRC_UNCOMPRESS_H_
#define LZW_SRC_UNCOMPRESS_H_
#include <string>
#include <vector>
[[nodiscard]] std::basic_string<unsigned char>
lzw_uncompress(std::vector<std::uint16_t> &&);
void uncompress(const std::string &, const char*);
#endif /* LZW_SRC_UNCOMPRESS_H_ */