Added (broken) uncompressing algo + fixed bit-unpacking
This commit is contained in:
parent
9f70b01886
commit
fcfe944c5d
@ -1,34 +1,24 @@
|
||||
#include "bitpack.hh"
|
||||
#include "common.hh"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
using std::uint16_t;
|
||||
using std::vector;
|
||||
using uchar = unsigned char;
|
||||
using vuint16 = vector<uint16_t>;
|
||||
using vuchar = vector<uchar>;
|
||||
using ustring = std::basic_string<unsigned char>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// packing //
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
|
||||
vuchar ret{};
|
||||
const int max_value = ipow(2, 8);
|
||||
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
|
||||
if (*it >= max_value) {
|
||||
const auto next_vec =
|
||||
pack_n(static_cast<vuint16::const_iterator>(it), t_input.end(), 9);
|
||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
||||
return ret;
|
||||
}
|
||||
ret.push_back(static_cast<unsigned char>(*it));
|
||||
}
|
||||
return ret;
|
||||
return pack_n(t_input.begin(), t_input.end(), 9);
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Packs \p t_input into unsigned char, assuming the max value of t_input
|
||||
* only takes \p t_n bits
|
||||
*
|
||||
@ -37,8 +27,8 @@ using vuchar = vector<uchar>;
|
||||
* \param t_n maximum size of an input value in bits \return Returns a
|
||||
* vector of unsigned char containing the packed values from t_input
|
||||
*/
|
||||
[[nodiscard]] vuchar
|
||||
pack_n(const vuint16::const_iterator t_input_begin,
|
||||
[[nodiscard]] vuchar
|
||||
pack_n(const vuint16::const_iterator t_input_begin,
|
||||
const vuint16::const_iterator t_input_end, const int t_n) {
|
||||
if (t_n == 16) {
|
||||
return pack_16(t_input_begin, t_input_end);
|
||||
@ -104,71 +94,63 @@ using vuchar = vector<uchar>;
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// unpacking //
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// unpacking //
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
[[nodiscard]] vuint16 unpack(const vuchar &t_input) {
|
||||
vuint16 ret{};
|
||||
|
||||
const int max_value = ipow(2, 8) - 1;
|
||||
|
||||
// begin with 8bits
|
||||
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
|
||||
ret.push_back(static_cast<uint16_t>(*it));
|
||||
if (*it >= max_value) {
|
||||
auto next_vec{unpack_n(it, t_input.end(), 9)};
|
||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
||||
return ret;
|
||||
uint16_t mask_n(int t_nb_bits) {
|
||||
if(t_nb_bits == 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
uint16_t mask = mask_n(t_nb_bits - 1);
|
||||
mask <<= 1;
|
||||
mask |= 0x1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
[[nodiscard]] vuint16 unpack_n(const vuchar::const_iterator t_begin,
|
||||
const vuchar::const_iterator t_end,
|
||||
[[nodiscard]] vuint16 unpack(ustring &&t_input) {
|
||||
return unpack_n(t_input.begin(), t_input.end(), 9);
|
||||
}
|
||||
|
||||
[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
|
||||
const ustring::const_iterator t_end,
|
||||
const int t_n) {
|
||||
int step = t_n / 8;
|
||||
int left_shift = 0;
|
||||
int middle_shift = 0;
|
||||
int right_shift = 0;
|
||||
vuint16 ret{};
|
||||
|
||||
const uint16_t mask = mask_n(t_n);
|
||||
const int max_value = ipow(2, t_n);
|
||||
for (auto it = t_begin; it < t_end;) {
|
||||
static const int max_value = ipow(2, t_n);
|
||||
uint16_t current_char = 0;
|
||||
left_shift += step;
|
||||
if (left_shift >= t_n) {
|
||||
left_shift = (left_shift - t_n) + step;
|
||||
}
|
||||
current_char = static_cast<uint16_t>((*it) << left_shift);
|
||||
|
||||
// left bits
|
||||
left_shift =
|
||||
((left_shift += step) >= t_n) ? (left_shift - t_n) + step : left_shift;
|
||||
current_char = static_cast<uint16_t>(*it << left_shift);
|
||||
// right bits
|
||||
bool zero_rs = right_shift;
|
||||
right_shift -= step;
|
||||
if (right_shift < 0) {
|
||||
// optional middle bits before right bits
|
||||
if (zero_rs) {
|
||||
middle_shift = std::abs(right_shift);
|
||||
current_char |= (*(++it)) << middle_shift;
|
||||
current_char |= *(++it) << std::abs(right_shift);
|
||||
}
|
||||
|
||||
right_shift = 8 - std::abs(right_shift);
|
||||
}
|
||||
current_char |= *(++it) >> right_shift;
|
||||
ret.push_back(current_char);
|
||||
|
||||
// char made!
|
||||
ret.push_back(current_char &= mask);
|
||||
if (current_char >= max_value) {
|
||||
const auto next_vec = unpack_n(it, t_end, t_n + 1);
|
||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
[[nodiscard]] vuint16 unpack_16(const vuchar::const_iterator t_begin,
|
||||
const vuchar::const_iterator t_end) {
|
||||
[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
|
||||
const ustring::const_iterator t_end) {
|
||||
vuint16 ret{};
|
||||
for (auto it = t_begin; it < t_end; ++it) {
|
||||
ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it)));
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define LZW_SRC_BITPACK_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/// \brief Bat-packs the input dynamically
|
||||
@ -24,14 +25,14 @@ pack_16(const std::vector<std::uint16_t>::const_iterator,
|
||||
const std::vector<std::uint16_t>::const_iterator);
|
||||
|
||||
[[nodiscard]] std::vector<std::uint16_t>
|
||||
unpack(const std::vector<unsigned char> &);
|
||||
unpack(std::basic_string<unsigned char> &&);
|
||||
|
||||
[[nodiscard]] std::vector<std::uint16_t>
|
||||
unpack_n(const std::vector<unsigned char>::const_iterator,
|
||||
const std::vector<unsigned char>::const_iterator, const int t_n);
|
||||
unpack_n(const std::basic_string<unsigned char>::const_iterator,
|
||||
const std::basic_string<unsigned char>::const_iterator, const int t_n);
|
||||
|
||||
[[nodiscard]] std::vector<std::uint16_t>
|
||||
unpack_16(const std::vector<unsigned char>::const_iterator,
|
||||
const std::vector<unsigned char>::const_iterator);
|
||||
unpack_16(const std::basic_string<unsigned char>::const_iterator,
|
||||
const std::basic_string<unsigned char>::const_iterator);
|
||||
|
||||
#endif /* LZW_SRC_BITPACK_H_ */
|
||||
|
@ -8,7 +8,7 @@
|
||||
using std::uint8_t;
|
||||
using std::uint16_t;
|
||||
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
|
||||
using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
|
||||
// using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
|
||||
using ustring = std::basic_string<unsigned char>;
|
||||
|
||||
int ipow(int base, int exp) {
|
||||
@ -44,8 +44,8 @@ int ipow(int base, int exp) {
|
||||
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
|
||||
* \return const std::pair<bool, uint16_t>
|
||||
*/
|
||||
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
|
||||
uint8_t t_c) {
|
||||
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
|
||||
const uint16_t t_nr_chaine, const uint8_t t_c) {
|
||||
if (t_nr_chaine == 0xFFFF) {
|
||||
return std::make_pair(true, t_c);
|
||||
}
|
||||
@ -58,3 +58,19 @@ std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
|
||||
t_dictionary.size()) +
|
||||
255));
|
||||
}
|
||||
|
||||
ustring dico_uncompress(std ::map<uint16_t, ustring> &t_dict,
|
||||
const uint16_t t_code, const uint16_t t_old) {
|
||||
auto &e = t_dict[t_code];
|
||||
if(e.empty()) {
|
||||
e = t_dict[t_old];
|
||||
const auto temp = e[0];
|
||||
e += temp;
|
||||
return e;
|
||||
}
|
||||
|
||||
auto str = t_dict[t_old];
|
||||
str += str[0];
|
||||
t_dict[static_cast<uint16_t>(t_dict.size())] = std::move(str);
|
||||
return e;
|
||||
}
|
||||
|
@ -15,6 +15,10 @@ int ipow(int, int);
|
||||
/// \brief Recherche ou ajout de chaine dans le dictionnaire
|
||||
std::pair<bool, std::uint16_t>
|
||||
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
|
||||
std::uint16_t, std::uint8_t);
|
||||
const std::uint16_t, const std::uint8_t);
|
||||
|
||||
std::basic_string<unsigned char>
|
||||
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
|
||||
const std::uint16_t, const std::uint16_t);
|
||||
|
||||
#endif /* LZW_SRC_COMMON_H_ */
|
||||
|
@ -44,6 +44,8 @@ void write_file(FILE *const t_out, const vvuint16 &t_text) {
|
||||
}
|
||||
fwrite(&size, sizeof(size), 1, t_out);
|
||||
for (const auto &chunk : t_text) {
|
||||
if constexpr (debug_mode)
|
||||
std::printf("Chunk!\n");
|
||||
write_chunk(t_out, chunk);
|
||||
}
|
||||
}
|
||||
|
22
src/main.cc
22
src/main.cc
@ -6,10 +6,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "compress.hh"
|
||||
#include "getopt.h"
|
||||
#include <getopt.h>
|
||||
#include <cassert>
|
||||
#include <tuple>
|
||||
#include "compress.hh"
|
||||
#include "uncompress.hh"
|
||||
|
||||
using std::printf;
|
||||
using std::puts;
|
||||
@ -45,7 +46,7 @@ void help() {
|
||||
puts("\t-o\tpath to the output file (if the file already exists, it will");
|
||||
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
|
||||
puts("\t-c\tcompress the input file");
|
||||
puts("\t-d\tdecompresses the input file to the output file. If no output");
|
||||
puts("\t-u\tuncompresses the input file to the output file. If no output");
|
||||
puts("\t\tpath has not been entered and if the input file ends with ");
|
||||
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
|
||||
puts("\t\textension \".uncompresed\" will be added");
|
||||
@ -98,14 +99,15 @@ int main(int argc, char *argv[]) {
|
||||
const auto [input_path, output_path, compressing] = process_args(argc, argv);
|
||||
assert(!input_path.empty());
|
||||
if (compressing) {
|
||||
if (output_path.empty()) {
|
||||
compress(input_path, nullptr);
|
||||
// if (output_path.empty()) {
|
||||
// compress(input_path, nullptr);
|
||||
// } else {
|
||||
// compress(input_path, output_path.c_str());
|
||||
// }
|
||||
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
|
||||
} else {
|
||||
compress(input_path, output_path.c_str());
|
||||
}
|
||||
} else {
|
||||
puts("Not yet implemented :(");
|
||||
/* Inversion des types du dictionnaire pour retrouver les chaînes plus aisément */
|
||||
uncompress(input_path,
|
||||
(output_path.empty()) ? nullptr : output_path.c_str());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
81
src/uncompress.cc
Normal file
81
src/uncompress.cc
Normal file
@ -0,0 +1,81 @@
|
||||
#include "uncompress.hh"
|
||||
#include "bitpack.hh"
|
||||
#include "common.hh"
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#ifdef Debug
|
||||
constexpr bool debug_mode = true;
|
||||
#else
|
||||
constexpr bool debug_mode = false;
|
||||
#endif
|
||||
|
||||
using std::fclose;
|
||||
using std::fopen;
|
||||
using std::fseek;
|
||||
using std::string;
|
||||
using std::uint16_t;
|
||||
using std::vector;
|
||||
using ustring = std::basic_string<unsigned char>;
|
||||
using vuint16 = vector<uint16_t>;
|
||||
|
||||
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
|
||||
ustring ret{};
|
||||
uint16_t old = 0;
|
||||
uint16_t code = t_compressed[0];
|
||||
std::map<uint16_t, ustring> dict{};
|
||||
ret.push_back(static_cast<unsigned char>(code));
|
||||
old = code;
|
||||
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
|
||||
code = *it;
|
||||
const auto uncompressed{dico_uncompress(dict, code, old)};
|
||||
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
|
||||
old = code;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void uncompress(const string &t_input_name, const char *t_output_name) {
|
||||
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
|
||||
assert(input);
|
||||
|
||||
FILE *const output =
|
||||
(t_output_name != nullptr)
|
||||
? std::fopen(t_output_name, "wb")
|
||||
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
|
||||
assert(output);
|
||||
|
||||
uint16_t nb_chunks = 0;
|
||||
std::fseek(input, 0, SEEK_SET);
|
||||
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
|
||||
|
||||
if constexpr (debug_mode) {
|
||||
std::printf("Number of chunks: %d\n", nb_chunks);
|
||||
}
|
||||
|
||||
for (uint16_t i = 0; i < nb_chunks; ++i) {
|
||||
uint16_t size_chunk = 0;
|
||||
fread(&size_chunk, sizeof(size_chunk), 1, input);
|
||||
if constexpr (debug_mode) {
|
||||
printf("Chunk! Size of chunk:\t%d\n", size_chunk);
|
||||
}
|
||||
unsigned char *chunk = static_cast<unsigned char *>(
|
||||
std::malloc(sizeof(unsigned char) * size_chunk));
|
||||
fread(chunk, sizeof(unsigned char), size_chunk, input);
|
||||
|
||||
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
|
||||
if constexpr(debug_mode) {
|
||||
for(const auto val : unpacked) {
|
||||
std::printf("%d\n", val);
|
||||
}
|
||||
}
|
||||
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
|
||||
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
|
||||
uncompressed_chunk.size(), output);
|
||||
}
|
||||
|
||||
std::fclose(output);
|
||||
std::fclose(input);
|
||||
}
|
12
src/uncompress.hh
Normal file
12
src/uncompress.hh
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef LZW_SRC_UNCOMPRESS_H_
|
||||
#define LZW_SRC_UNCOMPRESS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
[[nodiscard]] std::basic_string<unsigned char>
|
||||
lzw_uncompress(std::vector<std::uint16_t> &&);
|
||||
|
||||
void uncompress(const std::string &, const char*);
|
||||
|
||||
#endif /* LZW_SRC_UNCOMPRESS_H_ */
|
Loading…
Reference in New Issue
Block a user