Added (broken) uncompressing algo + fixed bit-unpacking

This commit is contained in:
Phuntsok Drak-pa 2018-06-10 21:21:06 +02:00
parent 9f70b01886
commit fcfe944c5d
8 changed files with 181 additions and 81 deletions

View File

@ -1,34 +1,24 @@
#include "bitpack.hh"
#include "common.hh"
#include <algorithm>
#include <cmath>
#include <algorithm>
using std::uint16_t;
using std::vector;
using uchar = unsigned char;
using vuint16 = vector<uint16_t>;
using vuchar = vector<uchar>;
using ustring = std::basic_string<unsigned char>;
///////////////////////////////////////////////////////////////////////////////
// packing //
///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
vuchar ret{};
const int max_value = ipow(2, 8);
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
if (*it >= max_value) {
const auto next_vec =
pack_n(static_cast<vuint16::const_iterator>(it), t_input.end(), 9);
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
}
ret.push_back(static_cast<unsigned char>(*it));
}
return ret;
return pack_n(t_input.begin(), t_input.end(), 9);
}
/**
/**
* Packs \p t_input into unsigned char, assuming the max value of t_input
* only takes \p t_n bits
*
@ -37,8 +27,8 @@ using vuchar = vector<uchar>;
* \param t_n maximum size of an input value in bits \return Returns a
* vector of unsigned char containing the packed values from t_input
*/
[[nodiscard]] vuchar
pack_n(const vuint16::const_iterator t_input_begin,
[[nodiscard]] vuchar
pack_n(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end, const int t_n) {
if (t_n == 16) {
return pack_16(t_input_begin, t_input_end);
@ -104,71 +94,63 @@ using vuchar = vector<uchar>;
return ret;
}
///////////////////////////////////////////////////////////////////////////////
// unpacking //
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unpacking //
///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuint16 unpack(const vuchar &t_input) {
vuint16 ret{};
const int max_value = ipow(2, 8) - 1;
// begin with 8bits
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
ret.push_back(static_cast<uint16_t>(*it));
if (*it >= max_value) {
auto next_vec{unpack_n(it, t_input.end(), 9)};
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
uint16_t mask_n(int t_nb_bits) {
if(t_nb_bits == 0) {
return 0;
}
}
return ret;
uint16_t mask = mask_n(t_nb_bits - 1);
mask <<= 1;
mask |= 0x1;
return mask;
}
[[nodiscard]] vuint16 unpack_n(const vuchar::const_iterator t_begin,
const vuchar::const_iterator t_end,
[[nodiscard]] vuint16 unpack(ustring &&t_input) {
return unpack_n(t_input.begin(), t_input.end(), 9);
}
[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
const int t_n) {
int step = t_n / 8;
int left_shift = 0;
int middle_shift = 0;
int right_shift = 0;
vuint16 ret{};
const uint16_t mask = mask_n(t_n);
const int max_value = ipow(2, t_n);
for (auto it = t_begin; it < t_end;) {
static const int max_value = ipow(2, t_n);
uint16_t current_char = 0;
left_shift += step;
if (left_shift >= t_n) {
left_shift = (left_shift - t_n) + step;
}
current_char = static_cast<uint16_t>((*it) << left_shift);
// left bits
left_shift =
((left_shift += step) >= t_n) ? (left_shift - t_n) + step : left_shift;
current_char = static_cast<uint16_t>(*it << left_shift);
// right bits
bool zero_rs = right_shift;
right_shift -= step;
if (right_shift < 0) {
// optional middle bits before right bits
if (zero_rs) {
middle_shift = std::abs(right_shift);
current_char |= (*(++it)) << middle_shift;
current_char |= *(++it) << std::abs(right_shift);
}
right_shift = 8 - std::abs(right_shift);
}
current_char |= *(++it) >> right_shift;
ret.push_back(current_char);
// char made!
ret.push_back(current_char &= mask);
if (current_char >= max_value) {
const auto next_vec = unpack_n(it, t_end, t_n + 1);
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
}
}
return ret;
}
[[nodiscard]] vuint16 unpack_16(const vuchar::const_iterator t_begin,
const vuchar::const_iterator t_end) {
[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end) {
vuint16 ret{};
for (auto it = t_begin; it < t_end; ++it) {
ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it)));

View File

@ -7,6 +7,7 @@
#define LZW_SRC_BITPACK_H_
#include <cstdint>
#include <string>
#include <vector>
/// \brief Bat-packs the input dynamically
@ -24,14 +25,14 @@ pack_16(const std::vector<std::uint16_t>::const_iterator,
const std::vector<std::uint16_t>::const_iterator);
[[nodiscard]] std::vector<std::uint16_t>
unpack(const std::vector<unsigned char> &);
unpack(std::basic_string<unsigned char> &&);
[[nodiscard]] std::vector<std::uint16_t>
unpack_n(const std::vector<unsigned char>::const_iterator,
const std::vector<unsigned char>::const_iterator, const int t_n);
unpack_n(const std::basic_string<unsigned char>::const_iterator,
const std::basic_string<unsigned char>::const_iterator, const int t_n);
[[nodiscard]] std::vector<std::uint16_t>
unpack_16(const std::vector<unsigned char>::const_iterator,
const std::vector<unsigned char>::const_iterator);
unpack_16(const std::basic_string<unsigned char>::const_iterator,
const std::basic_string<unsigned char>::const_iterator);
#endif /* LZW_SRC_BITPACK_H_ */

View File

@ -8,7 +8,7 @@
using std::uint8_t;
using std::uint16_t;
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
// using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
using ustring = std::basic_string<unsigned char>;
int ipow(int base, int exp) {
@ -44,8 +44,8 @@ int ipow(int base, int exp) {
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
* \return const std::pair<bool, uint16_t>
*/
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
uint8_t t_c) {
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
const uint16_t t_nr_chaine, const uint8_t t_c) {
if (t_nr_chaine == 0xFFFF) {
return std::make_pair(true, t_c);
}
@ -58,3 +58,19 @@ std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
t_dictionary.size()) +
255));
}
ustring dico_uncompress(std ::map<uint16_t, ustring> &t_dict,
const uint16_t t_code, const uint16_t t_old) {
auto &e = t_dict[t_code];
if(e.empty()) {
e = t_dict[t_old];
const auto temp = e[0];
e += temp;
return e;
}
auto str = t_dict[t_old];
str += str[0];
t_dict[static_cast<uint16_t>(t_dict.size())] = std::move(str);
return e;
}

View File

@ -15,6 +15,10 @@ int ipow(int, int);
/// \brief Recherche ou ajout de chaine dans le dictionnaire
std::pair<bool, std::uint16_t>
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
std::uint16_t, std::uint8_t);
const std::uint16_t, const std::uint8_t);
std::basic_string<unsigned char>
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
const std::uint16_t, const std::uint16_t);
#endif /* LZW_SRC_COMMON_H_ */

View File

@ -44,6 +44,8 @@ void write_file(FILE *const t_out, const vvuint16 &t_text) {
}
fwrite(&size, sizeof(size), 1, t_out);
for (const auto &chunk : t_text) {
if constexpr (debug_mode)
std::printf("Chunk!\n");
write_chunk(t_out, chunk);
}
}

View File

@ -6,10 +6,11 @@
*
*/
#include "compress.hh"
#include "getopt.h"
#include <getopt.h>
#include <cassert>
#include <tuple>
#include "compress.hh"
#include "uncompress.hh"
using std::printf;
using std::puts;
@ -45,7 +46,7 @@ void help() {
puts("\t-o\tpath to the output file (if the file already exists, it will");
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
puts("\t-c\tcompress the input file");
puts("\t-d\tdecompresses the input file to the output file. If no output");
puts("\t-u\tuncompresses the input file to the output file. If no output");
puts("\t\tpath has not been entered and if the input file ends with ");
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
puts("\t\textension \".uncompresed\" will be added");
@ -98,14 +99,15 @@ int main(int argc, char *argv[]) {
const auto [input_path, output_path, compressing] = process_args(argc, argv);
assert(!input_path.empty());
if (compressing) {
if (output_path.empty()) {
compress(input_path, nullptr);
// if (output_path.empty()) {
// compress(input_path, nullptr);
// } else {
// compress(input_path, output_path.c_str());
// }
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
} else {
compress(input_path, output_path.c_str());
}
} else {
puts("Not yet implemented :(");
/* Inversion des types du dictionnaire pour retrouver les chaînes plus aisément */
uncompress(input_path,
(output_path.empty()) ? nullptr : output_path.c_str());
}
return 0;
}

81
src/uncompress.cc Normal file
View File

@ -0,0 +1,81 @@
#include "uncompress.hh"
#include "bitpack.hh"
#include "common.hh"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#ifdef Debug
constexpr bool debug_mode = true;
#else
constexpr bool debug_mode = false;
#endif
using std::fclose;
using std::fopen;
using std::fseek;
using std::string;
using std::uint16_t;
using std::vector;
using ustring = std::basic_string<unsigned char>;
using vuint16 = vector<uint16_t>;
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
ustring ret{};
uint16_t old = 0;
uint16_t code = t_compressed[0];
std::map<uint16_t, ustring> dict{};
ret.push_back(static_cast<unsigned char>(code));
old = code;
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
code = *it;
const auto uncompressed{dico_uncompress(dict, code, old)};
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
old = code;
}
return ret;
}
void uncompress(const string &t_input_name, const char *t_output_name) {
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
assert(input);
FILE *const output =
(t_output_name != nullptr)
? std::fopen(t_output_name, "wb")
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
assert(output);
uint16_t nb_chunks = 0;
std::fseek(input, 0, SEEK_SET);
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
if constexpr (debug_mode) {
std::printf("Number of chunks: %d\n", nb_chunks);
}
for (uint16_t i = 0; i < nb_chunks; ++i) {
uint16_t size_chunk = 0;
fread(&size_chunk, sizeof(size_chunk), 1, input);
if constexpr (debug_mode) {
printf("Chunk! Size of chunk:\t%d\n", size_chunk);
}
unsigned char *chunk = static_cast<unsigned char *>(
std::malloc(sizeof(unsigned char) * size_chunk));
fread(chunk, sizeof(unsigned char), size_chunk, input);
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
if constexpr(debug_mode) {
for(const auto val : unpacked) {
std::printf("%d\n", val);
}
}
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
uncompressed_chunk.size(), output);
}
std::fclose(output);
std::fclose(input);
}

12
src/uncompress.hh Normal file
View File

@ -0,0 +1,12 @@
#ifndef LZW_SRC_UNCOMPRESS_H_
#define LZW_SRC_UNCOMPRESS_H_
#include <string>
#include <vector>
[[nodiscard]] std::basic_string<unsigned char>
lzw_uncompress(std::vector<std::uint16_t> &&);
void uncompress(const std::string &, const char*);
#endif /* LZW_SRC_UNCOMPRESS_H_ */