Added (broken) uncompressing algo + fixed bit-unpacking
This commit is contained in:
parent
9f70b01886
commit
fcfe944c5d
@ -1,31 +1,21 @@
|
|||||||
#include "bitpack.hh"
|
#include "bitpack.hh"
|
||||||
#include "common.hh"
|
#include "common.hh"
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
using std::uint16_t;
|
using std::uint16_t;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using uchar = unsigned char;
|
using uchar = unsigned char;
|
||||||
using vuint16 = vector<uint16_t>;
|
using vuint16 = vector<uint16_t>;
|
||||||
using vuchar = vector<uchar>;
|
using vuchar = vector<uchar>;
|
||||||
|
using ustring = std::basic_string<unsigned char>;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// packing //
|
// packing //
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
|
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
|
||||||
vuchar ret{};
|
return pack_n(t_input.begin(), t_input.end(), 9);
|
||||||
const int max_value = ipow(2, 8);
|
|
||||||
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
|
|
||||||
if (*it >= max_value) {
|
|
||||||
const auto next_vec =
|
|
||||||
pack_n(static_cast<vuint16::const_iterator>(it), t_input.end(), 9);
|
|
||||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
ret.push_back(static_cast<unsigned char>(*it));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -108,67 +98,59 @@ using vuchar = vector<uchar>;
|
|||||||
// unpacking //
|
// unpacking //
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
[[nodiscard]] vuint16 unpack(const vuchar &t_input) {
|
uint16_t mask_n(int t_nb_bits) {
|
||||||
vuint16 ret{};
|
if(t_nb_bits == 0) {
|
||||||
|
return 0;
|
||||||
const int max_value = ipow(2, 8) - 1;
|
|
||||||
|
|
||||||
// begin with 8bits
|
|
||||||
for (auto it = t_input.begin(); it != t_input.end(); ++it) {
|
|
||||||
ret.push_back(static_cast<uint16_t>(*it));
|
|
||||||
if (*it >= max_value) {
|
|
||||||
auto next_vec{unpack_n(it, t_input.end(), 9)};
|
|
||||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
uint16_t mask = mask_n(t_nb_bits - 1);
|
||||||
|
mask <<= 1;
|
||||||
|
mask |= 0x1;
|
||||||
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
[[nodiscard]] vuint16 unpack(ustring &&t_input) {
|
||||||
|
return unpack_n(t_input.begin(), t_input.end(), 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] vuint16 unpack_n(const vuchar::const_iterator t_begin,
|
[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
|
||||||
const vuchar::const_iterator t_end,
|
const ustring::const_iterator t_end,
|
||||||
const int t_n) {
|
const int t_n) {
|
||||||
int step = t_n / 8;
|
int step = t_n / 8;
|
||||||
int left_shift = 0;
|
int left_shift = 0;
|
||||||
int middle_shift = 0;
|
|
||||||
int right_shift = 0;
|
int right_shift = 0;
|
||||||
vuint16 ret{};
|
vuint16 ret{};
|
||||||
|
const uint16_t mask = mask_n(t_n);
|
||||||
|
const int max_value = ipow(2, t_n);
|
||||||
for (auto it = t_begin; it < t_end;) {
|
for (auto it = t_begin; it < t_end;) {
|
||||||
static const int max_value = ipow(2, t_n);
|
|
||||||
uint16_t current_char = 0;
|
uint16_t current_char = 0;
|
||||||
left_shift += step;
|
// left bits
|
||||||
if (left_shift >= t_n) {
|
left_shift =
|
||||||
left_shift = (left_shift - t_n) + step;
|
((left_shift += step) >= t_n) ? (left_shift - t_n) + step : left_shift;
|
||||||
}
|
current_char = static_cast<uint16_t>(*it << left_shift);
|
||||||
current_char = static_cast<uint16_t>((*it) << left_shift);
|
// right bits
|
||||||
|
|
||||||
bool zero_rs = right_shift;
|
bool zero_rs = right_shift;
|
||||||
right_shift -= step;
|
right_shift -= step;
|
||||||
if (right_shift < 0) {
|
if (right_shift < 0) {
|
||||||
|
// optional middle bits before right bits
|
||||||
if (zero_rs) {
|
if (zero_rs) {
|
||||||
middle_shift = std::abs(right_shift);
|
current_char |= *(++it) << std::abs(right_shift);
|
||||||
current_char |= (*(++it)) << middle_shift;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
right_shift = 8 - std::abs(right_shift);
|
right_shift = 8 - std::abs(right_shift);
|
||||||
}
|
}
|
||||||
current_char |= *(++it) >> right_shift;
|
current_char |= *(++it) >> right_shift;
|
||||||
ret.push_back(current_char);
|
// char made!
|
||||||
|
ret.push_back(current_char &= mask);
|
||||||
if (current_char >= max_value) {
|
if (current_char >= max_value) {
|
||||||
const auto next_vec = unpack_n(it, t_end, t_n + 1);
|
const auto next_vec = unpack_n(it, t_end, t_n + 1);
|
||||||
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] vuint16 unpack_16(const vuchar::const_iterator t_begin,
|
[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
|
||||||
const vuchar::const_iterator t_end) {
|
const ustring::const_iterator t_end) {
|
||||||
vuint16 ret{};
|
vuint16 ret{};
|
||||||
for (auto it = t_begin; it < t_end; ++it) {
|
for (auto it = t_begin; it < t_end; ++it) {
|
||||||
ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it)));
|
ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it)));
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#define LZW_SRC_BITPACK_H_
|
#define LZW_SRC_BITPACK_H_
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
/// \brief Bat-packs the input dynamically
|
/// \brief Bat-packs the input dynamically
|
||||||
@ -24,14 +25,14 @@ pack_16(const std::vector<std::uint16_t>::const_iterator,
|
|||||||
const std::vector<std::uint16_t>::const_iterator);
|
const std::vector<std::uint16_t>::const_iterator);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::uint16_t>
|
[[nodiscard]] std::vector<std::uint16_t>
|
||||||
unpack(const std::vector<unsigned char> &);
|
unpack(std::basic_string<unsigned char> &&);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::uint16_t>
|
[[nodiscard]] std::vector<std::uint16_t>
|
||||||
unpack_n(const std::vector<unsigned char>::const_iterator,
|
unpack_n(const std::basic_string<unsigned char>::const_iterator,
|
||||||
const std::vector<unsigned char>::const_iterator, const int t_n);
|
const std::basic_string<unsigned char>::const_iterator, const int t_n);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::uint16_t>
|
[[nodiscard]] std::vector<std::uint16_t>
|
||||||
unpack_16(const std::vector<unsigned char>::const_iterator,
|
unpack_16(const std::basic_string<unsigned char>::const_iterator,
|
||||||
const std::vector<unsigned char>::const_iterator);
|
const std::basic_string<unsigned char>::const_iterator);
|
||||||
|
|
||||||
#endif /* LZW_SRC_BITPACK_H_ */
|
#endif /* LZW_SRC_BITPACK_H_ */
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
using std::uint8_t;
|
using std::uint8_t;
|
||||||
using std::uint16_t;
|
using std::uint16_t;
|
||||||
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
|
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
|
||||||
using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
|
// using dic_un_t = std::map<std::uint16_t, std::unique_ptr<std::pair<uint16_t, uint8_t>>>;
|
||||||
using ustring = std::basic_string<unsigned char>;
|
using ustring = std::basic_string<unsigned char>;
|
||||||
|
|
||||||
int ipow(int base, int exp) {
|
int ipow(int base, int exp) {
|
||||||
@ -44,8 +44,8 @@ int ipow(int base, int exp) {
|
|||||||
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
|
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
|
||||||
* \return const std::pair<bool, uint16_t>
|
* \return const std::pair<bool, uint16_t>
|
||||||
*/
|
*/
|
||||||
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
|
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
|
||||||
uint8_t t_c) {
|
const uint16_t t_nr_chaine, const uint8_t t_c) {
|
||||||
if (t_nr_chaine == 0xFFFF) {
|
if (t_nr_chaine == 0xFFFF) {
|
||||||
return std::make_pair(true, t_c);
|
return std::make_pair(true, t_c);
|
||||||
}
|
}
|
||||||
@ -58,3 +58,19 @@ std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary, uint16_t t_nr_chaine,
|
|||||||
t_dictionary.size()) +
|
t_dictionary.size()) +
|
||||||
255));
|
255));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ustring dico_uncompress(std ::map<uint16_t, ustring> &t_dict,
|
||||||
|
const uint16_t t_code, const uint16_t t_old) {
|
||||||
|
auto &e = t_dict[t_code];
|
||||||
|
if(e.empty()) {
|
||||||
|
e = t_dict[t_old];
|
||||||
|
const auto temp = e[0];
|
||||||
|
e += temp;
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto str = t_dict[t_old];
|
||||||
|
str += str[0];
|
||||||
|
t_dict[static_cast<uint16_t>(t_dict.size())] = std::move(str);
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
@ -15,6 +15,10 @@ int ipow(int, int);
|
|||||||
/// \brief Recherche ou ajout de chaine dans le dictionnaire
|
/// \brief Recherche ou ajout de chaine dans le dictionnaire
|
||||||
std::pair<bool, std::uint16_t>
|
std::pair<bool, std::uint16_t>
|
||||||
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
|
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
|
||||||
std::uint16_t, std::uint8_t);
|
const std::uint16_t, const std::uint8_t);
|
||||||
|
|
||||||
|
std::basic_string<unsigned char>
|
||||||
|
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
|
||||||
|
const std::uint16_t, const std::uint16_t);
|
||||||
|
|
||||||
#endif /* LZW_SRC_COMMON_H_ */
|
#endif /* LZW_SRC_COMMON_H_ */
|
||||||
|
@ -44,6 +44,8 @@ void write_file(FILE *const t_out, const vvuint16 &t_text) {
|
|||||||
}
|
}
|
||||||
fwrite(&size, sizeof(size), 1, t_out);
|
fwrite(&size, sizeof(size), 1, t_out);
|
||||||
for (const auto &chunk : t_text) {
|
for (const auto &chunk : t_text) {
|
||||||
|
if constexpr (debug_mode)
|
||||||
|
std::printf("Chunk!\n");
|
||||||
write_chunk(t_out, chunk);
|
write_chunk(t_out, chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
22
src/main.cc
22
src/main.cc
@ -6,10 +6,11 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "compress.hh"
|
#include <getopt.h>
|
||||||
#include "getopt.h"
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
#include "compress.hh"
|
||||||
|
#include "uncompress.hh"
|
||||||
|
|
||||||
using std::printf;
|
using std::printf;
|
||||||
using std::puts;
|
using std::puts;
|
||||||
@ -45,7 +46,7 @@ void help() {
|
|||||||
puts("\t-o\tpath to the output file (if the file already exists, it will");
|
puts("\t-o\tpath to the output file (if the file already exists, it will");
|
||||||
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
|
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
|
||||||
puts("\t-c\tcompress the input file");
|
puts("\t-c\tcompress the input file");
|
||||||
puts("\t-d\tdecompresses the input file to the output file. If no output");
|
puts("\t-u\tuncompresses the input file to the output file. If no output");
|
||||||
puts("\t\tpath has not been entered and if the input file ends with ");
|
puts("\t\tpath has not been entered and if the input file ends with ");
|
||||||
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
|
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
|
||||||
puts("\t\textension \".uncompresed\" will be added");
|
puts("\t\textension \".uncompresed\" will be added");
|
||||||
@ -98,14 +99,15 @@ int main(int argc, char *argv[]) {
|
|||||||
const auto [input_path, output_path, compressing] = process_args(argc, argv);
|
const auto [input_path, output_path, compressing] = process_args(argc, argv);
|
||||||
assert(!input_path.empty());
|
assert(!input_path.empty());
|
||||||
if (compressing) {
|
if (compressing) {
|
||||||
if (output_path.empty()) {
|
// if (output_path.empty()) {
|
||||||
compress(input_path, nullptr);
|
// compress(input_path, nullptr);
|
||||||
|
// } else {
|
||||||
|
// compress(input_path, output_path.c_str());
|
||||||
|
// }
|
||||||
|
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
|
||||||
} else {
|
} else {
|
||||||
compress(input_path, output_path.c_str());
|
uncompress(input_path,
|
||||||
}
|
(output_path.empty()) ? nullptr : output_path.c_str());
|
||||||
} else {
|
|
||||||
puts("Not yet implemented :(");
|
|
||||||
/* Inversion des types du dictionnaire pour retrouver les chaînes plus aisément */
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
81
src/uncompress.cc
Normal file
81
src/uncompress.cc
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
#include "uncompress.hh"
|
||||||
|
#include "bitpack.hh"
|
||||||
|
#include "common.hh"
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#ifdef Debug
|
||||||
|
constexpr bool debug_mode = true;
|
||||||
|
#else
|
||||||
|
constexpr bool debug_mode = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using std::fclose;
|
||||||
|
using std::fopen;
|
||||||
|
using std::fseek;
|
||||||
|
using std::string;
|
||||||
|
using std::uint16_t;
|
||||||
|
using std::vector;
|
||||||
|
using ustring = std::basic_string<unsigned char>;
|
||||||
|
using vuint16 = vector<uint16_t>;
|
||||||
|
|
||||||
|
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
|
||||||
|
ustring ret{};
|
||||||
|
uint16_t old = 0;
|
||||||
|
uint16_t code = t_compressed[0];
|
||||||
|
std::map<uint16_t, ustring> dict{};
|
||||||
|
ret.push_back(static_cast<unsigned char>(code));
|
||||||
|
old = code;
|
||||||
|
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
|
||||||
|
code = *it;
|
||||||
|
const auto uncompressed{dico_uncompress(dict, code, old)};
|
||||||
|
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
|
||||||
|
old = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uncompress(const string &t_input_name, const char *t_output_name) {
|
||||||
|
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
|
||||||
|
assert(input);
|
||||||
|
|
||||||
|
FILE *const output =
|
||||||
|
(t_output_name != nullptr)
|
||||||
|
? std::fopen(t_output_name, "wb")
|
||||||
|
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
|
||||||
|
assert(output);
|
||||||
|
|
||||||
|
uint16_t nb_chunks = 0;
|
||||||
|
std::fseek(input, 0, SEEK_SET);
|
||||||
|
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
|
||||||
|
|
||||||
|
if constexpr (debug_mode) {
|
||||||
|
std::printf("Number of chunks: %d\n", nb_chunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint16_t i = 0; i < nb_chunks; ++i) {
|
||||||
|
uint16_t size_chunk = 0;
|
||||||
|
fread(&size_chunk, sizeof(size_chunk), 1, input);
|
||||||
|
if constexpr (debug_mode) {
|
||||||
|
printf("Chunk! Size of chunk:\t%d\n", size_chunk);
|
||||||
|
}
|
||||||
|
unsigned char *chunk = static_cast<unsigned char *>(
|
||||||
|
std::malloc(sizeof(unsigned char) * size_chunk));
|
||||||
|
fread(chunk, sizeof(unsigned char), size_chunk, input);
|
||||||
|
|
||||||
|
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
|
||||||
|
if constexpr(debug_mode) {
|
||||||
|
for(const auto val : unpacked) {
|
||||||
|
std::printf("%d\n", val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
|
||||||
|
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
|
||||||
|
uncompressed_chunk.size(), output);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fclose(output);
|
||||||
|
std::fclose(input);
|
||||||
|
}
|
12
src/uncompress.hh
Normal file
12
src/uncompress.hh
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef LZW_SRC_UNCOMPRESS_H_
|
||||||
|
#define LZW_SRC_UNCOMPRESS_H_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
[[nodiscard]] std::basic_string<unsigned char>
|
||||||
|
lzw_uncompress(std::vector<std::uint16_t> &&);
|
||||||
|
|
||||||
|
void uncompress(const std::string &, const char*);
|
||||||
|
|
||||||
|
#endif /* LZW_SRC_UNCOMPRESS_H_ */
|
Loading…
Reference in New Issue
Block a user