Compare commits

..

No commits in common. "master" and "circle-ci" have entirely different histories.

28 changed files with 3289 additions and 559 deletions

6
.bettercodehub.yml Normal file
View File

@ -0,0 +1,6 @@
component_depth: 2
languages:
- cpp
exclude:
- includes/*
- .*/*

View File

@ -5,4 +5,4 @@ jobs:
- image: purplekarrot/gcc-8
steps:
- checkout
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j

12
.gitignore vendored
View File

@ -1,13 +1,9 @@
*~
gmon\.out
/cmake-build-debug/Makefile
cmake-build-debug/
\.idea/
*.lzw
\.scannerwork/
bw-output/
bin/
build/
debug/

View File

@ -1,7 +0,0 @@
image: rikorose/gcc-cmake:latest
stages:
- build
build:
stage: build
script:
- mkdir -p build bin && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j

View File

@ -4,48 +4,60 @@ compiler: clang
os: linux
dist: trusty
addons:
sonarcloud:
organization: "phundrak-github"
token:
secure: ${SONAR_TOKEN}
apt:
config:
retries: true
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-trusty-5.0
packages:
- g++-7
- clang-5.0
cache:
apt: true
matrix:
include:
# Linux Clang C++17
- env:
BUILD_TYPE=debug
BUILD_TYPE=Debug
BIN_DIR=debug
CC=clang-5.0
CXX=clang++-5.0
- env:
BUILD_TYPE=release
BUILD_TYPE=Release
BIN_DIR=bin
CC=clang-5.0
CXX=clang++-5.0
# Linux GCC C++17
- env:
- MATRIX_EVAL="BUILD_TYPE=debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
- MATRIX_EVAL="BUILD_TYPE=Debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
compiler: gcc
before_install:
- eval "${MATRIX_EVAL}"
- env:
- MATRIX_EVAL="BUILD_TYPE=release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
- MATRIX_EVAL="BUILD_TYPE=Release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
compiler: gcc
before_install:
- eval "${MATRIX_EVAL}"
# OSX Clang
- os: osx
osx_image: xcode9.3
env:
BUILD_TYPE=Release
BIN_DIR=bin
before_install:
brew update
install:
brew upgrade cmake
- os: osx
osx_image: xcode9.3
env:
BUILD_TYPE=Debug
BIN_DIR=debug
before_install:
brew update
install:
brew upgrade cmake
script:
- make $BUILD_TYPE
- build-wrapper-linux-x86-64 --out-dir bw-output make clean all
- sonar-scanner -Dsonar.projectKey=Phundrak_lzw-assignment -Dsonar.sources=./src/ -Dsonar.cfamily.build-wrapper-output=bw-output -Dsonar.host.url=https://sonarcloud.io
- cd build
- cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
- make -j

View File

@ -1,5 +1,6 @@
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
set(CMAKE_LEGACY_CYGWIN_WIN32 0)
set(CMAKE_BUILD_TYPE Debug)
project("projet_lzw")
@ -25,6 +26,8 @@ else()
message( FATAL_ERROR "C++17 not supported, CMake will exit." )
endif()
endif()
# set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS OFF)

View File

@ -1,16 +0,0 @@
all:
@mkdir -p build
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
@strip bin/project_lzw
release:
@mkdir -p build
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
@strip bin/projet_lzw
debug:
@mkdir -p build
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Debug .. && make
clean:
rm -rf bin/* build/* debug/*

View File

@ -1,25 +1,11 @@
[![CircleCI](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master.svg?style=svg)](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master)
[![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade)
[![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
[![SonarCloud Badge](https://sonarcloud.io/api/project_badges/measure?project=Phundrak_lzw-assignment&metric=alert_status)](https://sonarcloud.io/dashboard?id=Phundrak_lzw-assignment)
[![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org)
[![BCH compliance](https://bettercodehub.com/edge/badge/Phundrak/lzw-assignment?branch=master)](https://bettercodehub.com/)
# LZW Compressing tool
This is a university assignment for which I aim to create an LZW algorithm implementation to create a small tool similar to `gzip` and `gunzip` that can compress and uncompress files in a lossless fashion.
This project is written is C++17, compiled with clang under a UNIX environment. Other compilers and environments will not be tested.
## How to use it
Currently, five different options are available to the user:
- `-h` or `--help` will show how to use `projet_lzw`
- `-c` or `--compress` tells `projet_lzw` to compress the input file
- `-u` or `--uncompress` tells `projet_lzw` to uncompress the input file
- `-i <file>` or `--input <file path>` specifies the input file to be compressed or uncompressed (**MANDATORY**)
- `-o <file>` or `--output <file path>` specifies the name of the output file. If not used, the default output name for compression is `output.lzw`, and the default name for uncompressing is `<filename>_uncompressed`.
By default, `projet_lzw` will uncompress the (mandatory) input file.
It is planned to add in the future a sixth option, `-p` or `--passes` that will allow to compress multiple times the input file.

2
bin/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
build/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
debug/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

3
doc/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore
!Doxyfile

2440
doc/Doxyfile Normal file

File diff suppressed because it is too large Load Diff

244
includes/getopt.c Normal file
View File

@ -0,0 +1,244 @@
/*
Copyright 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "getopt.h"
#include <stddef.h>
#include <stdio.h>
#include <string.h>
char *optarg;
int optind = 1, opterr = 1, optopt, __optpos, optreset = 0;
#define optpos __optpos
static void __getopt_msg(const char *a, const char *b, const char *c,
size_t l) {
FILE *f = stderr;
flockfile(f);
fputs(a, f) >= 0 && fwrite(b, strlen(b), 1, f) &&
fwrite(c, 1, l, f) == l &&putc('\n', f);
funlockfile(f);
}
int getopt(int argc, char *const argv[], const char *optstring) {
int i, c, d;
int k, l;
char *optchar;
if (!optind || optreset) {
optreset = 0;
__optpos = 0;
optind = 1;
}
if (optind >= argc || !argv[optind])
return -1;
if (argv[optind][0] != '-') {
if (optstring[0] == '-') {
optarg = argv[optind++];
return 1;
}
return -1;
}
if (!argv[optind][1])
return -1;
if (argv[optind][1] == '-' && !argv[optind][2])
return optind++, -1;
if (!optpos)
optpos++;
c = argv[optind][optpos], k = 1;
optchar = argv[optind] + optpos;
optopt = c;
optpos += k;
if (!argv[optind][optpos]) {
optind++;
optpos = 0;
}
if (optstring[0] == '-' || optstring[0] == '+')
optstring++;
i = 0;
d = 0;
do {
d = optstring[i], l = 1;
if (l > 0)
i += l;
else
i++;
} while (l && d != c);
if (d != c) {
if (optstring[0] != ':' && opterr)
__getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
return '?';
}
if (optstring[i] == ':') {
if (optstring[i + 1] == ':')
optarg = 0;
else if (optind >= argc) {
if (optstring[0] == ':')
return ':';
if (opterr)
__getopt_msg(argv[0], ": option requires an argument: ", optchar, k);
return '?';
}
if (optstring[i + 1] != ':' || optpos) {
optarg = argv[optind++] + optpos;
optpos = 0;
}
}
return c;
}
static void permute(char *const *argv, int dest, int src) {
char **av = (char **)argv;
char *tmp = av[src];
int i;
for (i = src; i > dest; i--)
av[i] = av[i - 1];
av[dest] = tmp;
}
static int __getopt_long_core(int argc, char *const *argv,
const char *optstring,
const struct option *longopts, int *idx,
int longonly) {
optarg = 0;
if (longopts && argv[optind][0] == '-' &&
((longonly && argv[optind][1] && argv[optind][1] != '-') ||
(argv[optind][1] == '-' && argv[optind][2]))) {
int colon = optstring[optstring[0] == '+' || optstring[0] == '-'] == ':';
int i, cnt, match;
char *opt;
for (cnt = i = 0; longopts[i].name; i++) {
const char *name = longopts[i].name;
opt = argv[optind] + 1;
if (*opt == '-')
opt++;
for (; *name && *name == *opt; name++, opt++)
;
if (*opt && *opt != '=')
continue;
match = i;
if (!*name) {
cnt = 1;
break;
}
cnt++;
}
if (cnt == 1) {
i = match;
optind++;
optopt = longopts[i].val;
if (*opt == '=') {
if (!longopts[i].has_arg) {
if (colon || !opterr)
return '?';
__getopt_msg(argv[0],
": option does not take an argument: ", longopts[i].name,
strlen(longopts[i].name));
return '?';
}
optarg = opt + 1;
} else if (longopts[i].has_arg == required_argument) {
if (!(optarg = argv[optind])) {
if (colon)
return ':';
if (!opterr)
return '?';
__getopt_msg(argv[0],
": option requires an argument: ", longopts[i].name,
strlen(longopts[i].name));
return '?';
}
optind++;
}
if (idx)
*idx = i;
if (longopts[i].flag) {
*longopts[i].flag = longopts[i].val;
return 0;
}
return longopts[i].val;
}
if (argv[optind][1] == '-') {
if (!colon && opterr)
__getopt_msg(argv[0],
cnt ? ": option is ambiguous: "
: ": unrecognized option: ",
argv[optind] + 2, strlen(argv[optind] + 2));
optind++;
return '?';
}
}
return getopt(argc, argv, optstring);
}
static int __getopt_long(int argc, char *const *argv, const char *optstring,
const struct option *longopts, int *idx,
int longonly) {
int ret, skipped, resumed;
if (!optind || optreset) {
optreset = 0;
__optpos = 0;
optind = 1;
}
if (optind >= argc || !argv[optind])
return -1;
skipped = optind;
if (optstring[0] != '+' && optstring[0] != '-') {
int i;
for (i = optind;; i++) {
if (i >= argc || !argv[i])
return -1;
if (argv[i][0] == '-' && argv[i][1])
break;
}
optind = i;
}
resumed = optind;
ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly);
if (resumed > skipped) {
int i, cnt = optind - resumed;
for (i = 0; i < cnt; i++)
permute(argv, skipped, optind - 1);
optind = skipped + cnt;
}
return ret;
}
int getopt_long(int argc, char *const *argv, const char *optstring,
const struct option *longopts, int *idx) {
return __getopt_long(argc, argv, optstring, longopts, idx, 0);
}
int getopt_long_only(int argc, char *const *argv, const char *optstring,
const struct option *longopts, int *idx) {
return __getopt_long(argc, argv, optstring, longopts, idx, 1);
}

54
includes/getopt.h Normal file
View File

@ -0,0 +1,54 @@
/*
Copyright 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _GETOPT_H
#define _GETOPT_H
#ifdef __cplusplus
extern "C" {
#endif
int getopt(int, char *const[], const char *);
extern char *optarg;
extern int optind, opterr, optopt, optreset;
struct option {
const char *name;
int has_arg;
int *flag;
int val;
};
int getopt_long(int, char *const *, const char *, const struct option *, int *);
int getopt_long_only(int, char *const *, const char *, const struct option *,
int *);
#define no_argument 0
#define required_argument 1
#define optional_argument 2
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,177 +0,0 @@
#include "bitpack.hh"
#include "common.hh"
#include <algorithm>
#include <cmath>
using std::uint16_t;
using std::vector;
using uchar = unsigned char;
using vuint16 = vector<uint16_t>;
using vuchar = vector<uchar>;
using ustring = std::basic_string<unsigned char>;
[[nodiscard]] int max(const int t_n) { return ipow(2, t_n) - 1; }
[[nodiscard]] constexpr uint16_t mask_n(int t_nb_bits) {
if (t_nb_bits == 0) {
return 0;
}
uint16_t mask = mask_n(t_nb_bits - 1);
mask = static_cast<uint16_t>(mask << 1);
mask |= 0x1;
return mask;
}
constexpr uint16_t masks[17] = {
mask_n(0), mask_n(1), mask_n(2), mask_n(3), mask_n(4), mask_n(5),
mask_n(6), mask_n(7), mask_n(8), mask_n(9), mask_n(10), mask_n(11),
mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)};
///////////////////////////////////////////////////////////////////////////////
// packing //
///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
vuchar vec{};
return pack_n(t_input.begin(), t_input.end(), vec, 9);
}
[[nodiscard]] vuchar pack_n(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end,
vuchar &t_res, int t_n) {
if (t_n == 16) {
return pack_16(t_input_begin, t_input_end, t_res);
}
const int max_value = max(t_n); // max value held within t_n bits
int step = t_n % 8;
int left_shift = 0;
int right_shift = 0;
uchar current_char = 0;
bool char_touched = false;
// pour chaque élément
for (auto it = t_input_begin; it != t_input_end; ++it) {
// si on a atteint ou dépassé la valeur maximale, on change de nombre de
// bits
if (*it >= max_value) {
// écriture du masque pour notifier à la décompression du changement de
// bits
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
const auto mask = masks[t_n] >> left_shift;
t_res.push_back(static_cast<uchar>(current_char | mask));
bool zero_rs = (right_shift == 0);
right_shift -= step;
if (right_shift < 0 && !zero_rs) {
// si right_shift est inférieur à zéro
// si right_shift était différent de zéro, alors extra octet
current_char = static_cast<uchar>(masks[t_n] >> (-right_shift) & 0xFFU);
t_res.push_back(current_char);
}
t_res.push_back(static_cast<uchar>(masks[t_n]));
return pack_n(it, t_input_end, t_res, t_n + 1);
}
// écriture normale
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
t_res.push_back(
static_cast<uchar>(current_char | (*it >> left_shift & 0xFFU)));
bool zero_rs = (right_shift == 0);
right_shift -= step;
if (right_shift < 0) {
if (!zero_rs) {
current_char = static_cast<uchar>(*it >> (-right_shift) & 0xFFU);
t_res.push_back(current_char);
}
right_shift = 8 + right_shift;
}
if (right_shift == 0) {
current_char = static_cast<uchar>(*it & 0xFFU);
t_res.push_back(current_char);
current_char = 0;
char_touched = false;
} else {
current_char = static_cast<uchar>(*it << right_shift & 0xFFU);
char_touched = true;
}
}
if (char_touched) {
t_res.push_back(current_char);
}
return t_res;
}
[[nodiscard]] vuchar pack_16(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end,
vuchar &t_res) {
std::for_each(t_input_begin, t_input_end, [&t_res](const auto value) {
t_res.push_back(static_cast<uchar>(value >> 8 & 0xFFU));
t_res.push_back(static_cast<uchar>(value & 0xFFU));
});
return t_res;
}
///////////////////////////////////////////////////////////////////////////////
// unpacking //
///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuint16 unpack(ustring &&t_input) {
vuint16 vec{};
return unpack_n(t_input.begin(), t_input.end(), vec, 9);
}
[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
vuint16 &t_res, int t_n) {
if (t_n == 16) {
return unpack_16(t_begin, t_end, t_res);
}
int step = t_n % 8;
int left_shift = 0;
int right_shift = 0;
const int max_value = max(t_n);
for (auto it = t_begin; it < t_end - 1; /* nope */) {
uint16_t current_char = 0;
// left bits
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
current_char = static_cast<uint16_t>(*it << left_shift) & masks[t_n];
// right bits
bool zero_rs = (right_shift == 0);
right_shift -= step;
if (right_shift < 0) {
// if previous right shift was negative and not zero
if (!zero_rs) {
current_char |= *++it << (-right_shift) & masks[16 + right_shift];
}
right_shift = 8 + right_shift;
}
current_char |= *++it >> right_shift & masks[8 - right_shift];
// char made!
if (current_char >= max_value) { // if it is the mask
return unpack_n(it + 1, t_end, t_res, t_n + 1);
}
current_char &= masks[t_n];
t_res.push_back(current_char);
if (right_shift == 0) {
++it;
}
}
return t_res;
}
[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
vuint16 &t_res) {
for (auto it = t_begin; it < t_end; ++it) {
t_res.push_back(static_cast<uint16_t>(*it << 8 | *++it));
}
return t_res;
}

View File

@ -1,34 +0,0 @@
#ifndef LZW_SRC_BITPACK_H_
#define LZW_SRC_BITPACK_H_
#include <cstdint>
#include <string>
#include <vector>
[[nodiscard]] std::vector<unsigned char>
pack(const std::vector<std::uint16_t> &);
[[nodiscard]] std::vector<unsigned char>
pack_n(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator, std::vector<unsigned char> &,
int);
[[nodiscard]] std::vector<unsigned char>
pack_16(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
[[nodiscard]] std::vector<std::uint16_t>
unpack(std::basic_string<unsigned char> &&);
[[nodiscard]] std::vector<std::uint16_t>
unpack_n(std::basic_string<unsigned char>::const_iterator t_begin,
std::basic_string<unsigned char>::const_iterator t_end,
std::vector<std::uint16_t> &, int t_n);
[[nodiscard]] std::vector<std::uint16_t>
unpack_16(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
#endif /* LZW_SRC_BITPACK_H_ */

View File

@ -1,69 +1,44 @@
/**
* \file common.cc
* \brief Implementation for functions in common
*/
#include "common.hh"
using std::uint16_t;
using std::uint8_t;
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using ustring = std::basic_string<unsigned char>;
using p_ustring = std::shared_ptr<ustring>;
using uint8_t = std::uint8_t;
using uint32_t = std::uint32_t;
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
[[nodiscard]] int ipow(int base, int exp) {
int result = 1;
for (;;) {
if (exp & 1) {
result *= base;
}
exp >>= 1;
if (exp == 0) {
break;
}
base *= base;
}
return result;
}
[[nodiscard]] std::pair<bool, uint16_t>
dico(dic_comp_t &t_dictionary, const uint16_t t_nr_chaine, const uint8_t t_c) {
/**
* Cette fonction a pour double usage la recherche dune chaine de caractères
* dans le dictionnaire, ou bien lajout dune nouvelle chaîne si celle-ci nest
* pas déjà présente. Une chaine de caractères est représentée par un couple
* numéro de chaine / caractère, le numéro de chaine renvoyant au caractère
* précédent (soit son code ASCII, soit son indice dans le dictionnaire) et le
* caractère se référant au dernier caractère de la chaine courante. Si le
* numéro de chaine est -1, alors il sagit du premier caractère de la chaine,
* et la valeur renvoyée sera la valeur ASCII du caractère. La fonction renvoie
* une paire bool/uint32_t, la valeur booléene indiquant si une nouvelle fut
* ajoutée dans le dictionnaire ou non, et le uint32_t indiquant la valeur
* numérique de la chaîne dans le dictionnaire.
*
* \param t_dictionary Dictionnaire
* \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
* \return const std::pair<bool, uint32_t>
*/
const std::pair<bool, uint32_t>
dico(std::map<std::pair<uint32_t, uint8_t>, uint32_t> &t_dictionary,
uint32_t t_nr_chaine, uint8_t t_c) {
if (t_nr_chaine == 0xFFFF) {
return std::make_pair(true, t_c);
}
auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
if (e != 0)
return std::make_pair(true, e);
e = static_cast<uint16_t>(t_dictionary.size() + 255);
return std::make_pair(false, e);
}
[[nodiscard]] ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
const uint16_t t_code,
const uint16_t t_old) {
// le code existe dans le dictionnaire sil est < 256
if (t_code < 256) {
ustring e{static_cast<unsigned char>(t_code)};
// 256 car on n'a pas encore tenté d'insérer de nouveau caractère
if (t_old < 256) {
t_dict[static_cast<uint16_t>(t_dict.size() + 256)] =
static_cast<unsigned char>(t_old) + e;
} else {
t_dict[static_cast<uint16_t>(t_dict.size() + 256)] = t_dict[t_old] + e;
}
return e;
}
auto &e = t_dict[t_code];
auto str = (t_old < 256) ? ustring{static_cast<unsigned char>(t_old)}
: t_dict[t_old];
// le code existe dans le dictionnaire
if (!e.empty()) {
str += e[0];
const auto index = static_cast<uint16_t>(t_dict.size() + 256);
t_dict[index] = str;
return e;
}
// le code n'existe pas encore dans le dictionnaire
str += str[0];
e = str;
t_dict[t_code] = e;
return e;
return (e != 0) ? std::make_pair(true, e)
: std::make_pair(
false,
(e = static_cast<
typename std::remove_reference<decltype(e)>::type>(
t_dictionary.size()) +
255));
}

View File

@ -1,18 +1,17 @@
/**
* \file common.hh
* \brief Header for functions in common
*/
#ifndef LZW_SRC_COMMON_H_
#define LZW_SRC_COMMON_H_
#include <cstdint>
#include <map>
#include <memory>
[[nodiscard]] int ipow(int, int);
[[nodiscard]] std::pair<bool, std::uint16_t>
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
const std::uint16_t, const std::uint8_t);
[[nodiscard]] std::basic_string<unsigned char>
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
const std::uint16_t, const std::uint16_t);
/// \brief Recherche ou ajout de chaine dans le dictionnaire
const std::pair<bool, std::uint32_t>
dico(std::map<std::pair<std::uint32_t, std::uint8_t>, std::uint32_t> &,
uint32_t, uint8_t);
#endif /* LZW_SRC_COMMON_H_ */

View File

@ -1,77 +1,149 @@
/**
* \file compress.cc
* \brief Implementation of compression
*/
#include "compress.hh"
#include "common.hh"
#include "io.hh"
#include "utf8.hh"
#include <cassert>
#include <cstdlib>
#include <fstream>
#include <iterator>
using std::ios;
using std::string;
using std::uint16_t;
using std::uint8_t;
using std::vector;
using vuint16 = vector<uint16_t>;
using vvuint16 = vector<vuint16>;
using ustring = std::basic_string<unsigned char>;
#include <iostream>
#include "io.hh"
using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using dict_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
using ustring = std::basic_string<uint8_t>; // chaîne non encodée
using uvec = std::vector<std::uint32_t>; // chaîne encodée
using std::printf;
[[nodiscard]] ustring read_file(const string &filename) {
std::ifstream file{filename, ios::binary};
assert(file);
file.unsetf(ios::skipws);
file.seekg(0, ios::end);
const auto file_size = file.tellg();
file.seekg(0, ios::beg);
ustring res{};
res.reserve(file_size);
res.insert(res.begin(), std::istream_iterator<unsigned char>(file),
std::istream_iterator<unsigned char>());
file.close();
return res;
constexpr size_t CHUNK_SIZE = 32768;
/**
*
* Reçoit une liste de paires std::thread/vecteurs, le premier étant le
* processus dont sa sortie est stockée dans le second. La sortie, une liste
* de caractères uint32_t, est écrite dans le fichier de sortie \p out.
*
* \param[in] t_threads
* \param[out] t_out
*/
void join_and_write(
std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> &t_threads,
std::vector<std::vector<std::uint32_t>> &compressed_text) {
for (auto &elem : t_threads) {
(*elem.first).join();
}
for (auto &elem : t_threads) {
compressed_text.push_back(std::move(elem.second));
}
t_threads.clear();
}
[[nodiscard]] vvuint16 lzw_compress(ustring &&t_text) {
vvuint16 res{};
const auto DICT_MAX = static_cast<size_t>(ipow(2, 14) - 256); /* 16 bits */
uint16_t w = 0xFFFF;
vuint16 chunk{};
dict_t dict{};
for (const auto c : t_text) {
if (dict.size() >= DICT_MAX) {
if (w != 0xFFFF) {
chunk.push_back(w);
}
res.push_back(chunk);
w = 0xFFFF;
chunk.clear();
dict.clear();
}
if (const auto &[exists, pos] = dico(dict, w, static_cast<uint8_t>(c));
exists) {
/**
* La chaîne de caractère \p t_text est lue caractère par caractère, et est et
* selon la valeur de retour de la fonction \ref dico (permettant dans le même
* temps la création du dictionnaire), on rajoute un mot ou pas dans le vecteur
* de caractères UTF-8 représentant des mots de chars compressés. La fonction
* renvoie ledit vecteur de uint32_t via le paramètre \p t_res.
*
* \param[in] t_text Chaîne de caractères uint8_t représentant le fichier d'entrée
* \param[out] t_res Chaîne de caractères de sortie
*/
void lzw_compress(const std::vector<char> &t_text, uvec &t_res) {
dict_t dictionary{};
std::puts("Compressing...");
uint32_t w = 0xFFFF;
constexpr size_t DICT_MAX = 7936; /* 12 bits */
for (const auto &c : t_text) {
if (dictionary.size() >= DICT_MAX) {
t_res.push_back(static_cast<uint32_t>(w));
w = static_cast<uint32_t>(c);
} else if (const auto &[exists, pos] =
dico(dictionary, w, static_cast<std::uint8_t>(c));
exists) {
w = pos;
} else {
chunk.push_back(w);
w = static_cast<uint16_t>(c);
t_res.push_back(static_cast<uint32_t>(w));
w = static_cast<std::uint8_t>(c);
}
}
if (w != 0xFFFF) {
chunk.push_back(w);
res.push_back(std::move(chunk));
}
return res;
}
/**
* Wrapper de la fonction \ref lzw_compress gérant l'ouverture, la lecture,
* l'écriture et la fermeture des fichiers dentrée et de sortie. Si \p
* t_out_file est nul (chemin non spécifié), il prendra alors la valeur de
* \p t_in_file à laquelle sera annexé lextension `.lzw`.
*
* \param[in] t_in_file Chemin vers le fichier dentrée
* \param[in] t_out_file Chemin vers le fichier de sortie
*/
void compress(const std::string &t_in_file, const char *t_out_file) {
std::ofstream out{(t_out_file != nullptr) ? t_out_file : "output.lzw",
ios::out | ios::binary};
if (!out.is_open()) {
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
<< ": could not open output file. Aborting...\n";
// Fichier dentrée
std::ifstream input_file{t_in_file};
if (!input_file.is_open()) {
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 2
<< ": could not open output file \"" << t_in_file
<< "\". Aborting...\n";
exit(1);
}
const auto compressed_text(lzw_compress(read_file(t_in_file)));
// Fichier de sortie
FILE *out =
(t_out_file != nullptr) ? fopen(t_out_file, "wb") : fopen("output.lzw", "wb");
if (out == nullptr) {
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
<< ": could not open output file. Aborting...\n";
input_file.close();
exit(1);
}
// collection of chunks
std::vector<std::vector<std::uint32_t>> compressed_text{};
// thread pool
std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> threads{};
// chunk chars
std::vector<char> chunk(CHUNK_SIZE, 0);
while (input_file.read(chunk.data(),
static_cast<std::streamsize>(chunk.size()))) {
threads.emplace_back(nullptr, uvec{});
threads.back().second.reserve(CHUNK_SIZE);
threads.back().first = std::make_unique<std::thread>(
std::thread{lzw_compress, chunk, ref(threads.back().second)});
assert(threads.back().first);
if (threads.size() >= 8) {
join_and_write(threads, compressed_text);
}
}
if (!threads.empty()) {
join_and_write(threads, compressed_text);
}
if (input_file.tellg() != std::ios::end) {
std::puts("Leftovers, compressing...");
{
const auto prev_pos = input_file.tellg();
input_file.seekg(0, std::ios::end);
chunk.reserve(static_cast<size_t>(input_file.tellg() - prev_pos));
input_file.seekg(prev_pos, std::ios::beg);
std::istreambuf_iterator<char> itr(input_file);
for (std::streamoff i = 0; i < prev_pos; ++i, ++itr){
;
}
chunk.assign((itr), std::istreambuf_iterator<char>());
}
uvec ret{};
lzw_compress(chunk, ret);
compressed_text.push_back(std::move(ret));
}
write_file(out, compressed_text);
out.close();
fclose(out);
input_file.close();
}

View File

@ -1,14 +1,25 @@
/**
* \file compress.hh
* \brief Header for compression functions
*/
#ifndef LZW_SRC_COMPRESS_H_
#define LZW_SRC_COMPRESS_H_
#include "common.hh"
#include <vector>
#include <iostream>
#include <thread>
#include <vector>
[[nodiscard]] std::vector<std::vector<std::uint16_t>>
lzw_compress(std::basic_string<unsigned char> &&);
/// \brief Exécution des threads et écriture de leur résultat dans le fichier de sortie
void join_and_write(std::vector<std::pair<std::unique_ptr<std::thread>,
std::vector<std::uint32_t>>> &,
std::vector<std::vector<std::uint32_t>> &);
/// \brief Compression d'une chaine de caractères
void lzw_compress(const std::vector<char> &, std::vector<std::uint32_t> &);
/// \brief Wrapper de \ref lzw_compress
void compress(const std::string &, const char *);
#endif /* LZW_SRC_COMPRESS_H_ */

110
src/io.cc
View File

@ -1,27 +1,95 @@
/**
* \file io.cc
* \brief Body for file reading and writing
*/
#include "io.hh"
#include "bitpack.hh"
#include <array>
using std::uint16_t;
using std::vector;
using vuint16 = vector<uint16_t>;
using vvuint16 = vector<vuint16>;
void write_file(std::ofstream &t_out, const vvuint16 &t_chunks) {
const auto nr_chunks = static_cast<uint16_t>(t_chunks.size());
#ifdef Debug
std::printf("Number of chunks: %u\n", nr_chunks);
constexpr bool debug_mode = true;
#else
constexpr bool debug_mode = false;
#endif
t_out.write(reinterpret_cast<const char *>(&nr_chunks), sizeof(nr_chunks));
for (const auto &chunk : t_chunks) {
write_chunk(t_out, chunk);
/**
* Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de
* sortie est composé des éléments suivants :\n
* - Sur quatre octets sont écrit un `uint32_t` déterminant la taille d'un
* caractère\n
* - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de chunk
* composant le fichier\n
* - Sont ensuite écrits les chunks sur des nombres doctets variable suivant
* la taille dun caractère et le nombre de caractères\n
* \n
* Un chunk est composé de la manière qui suit :\n
* - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de
* caractères composant le chunk\n
* - Les caractères composant le chunk, accolés les uns au autres. Si le
* dernier caractère ne remplis pas le dernier octet du chunk, alors ces
* derniers bits seront initialisés à 0.\n
* La taille dun chunk est donc la taille dun caractère multiplié par le
* nombre de caractères du chunk, le tout divisé par 8. Si le résultat nest
* pas un entier, alors il est nivelé vers le haut pour avoir un nombre entier
* doctets (e.g. si le résultat est 103.4, alors 104 octets seront utilisés).
*
* \param[out] t_out Fichier de sortie
* \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out
*/
void write_file(FILE *t_out, std::vector<std::vector<std::uint32_t>> &t_text) {
{
uint32_t char_size = 12;
if constexpr (debug_mode) {
std::printf("Char size: %u\n", char_size);
}
fwrite(&char_size, sizeof(uint32_t), 1, t_out);
auto size = static_cast<uint32_t>(t_text.size());
if constexpr (debug_mode) {
std::printf("Number of chunks: %u\n", size);
}
fwrite(&size, sizeof(uint32_t), 1, t_out);
}
for(const auto &chunk : t_text) {
// write size of chunk in uint32_t
{
auto size = static_cast<uint32_t>(chunk.size());
if constexpr (debug_mode) {
std::printf("Size of chunk: %u\n", size);
}
fwrite(&size, sizeof(uint32_t), 1, t_out);
}
uint8_t remainder = 0x00;
for(size_t i = 0; i < chunk.size(); ++i) {
if(i % 2 == 0) {
// char = xxxx xxxx xxxx
// ^^^^^^^^^ ^^^^
// write keep in remainder as xxxx0000
auto temp = static_cast<unsigned char>(chunk[i] >> 4);
fwrite(&temp, sizeof(temp), 1, t_out);
if constexpr (debug_mode) {
std::printf("writing: %x\t\t", temp);
}
remainder = static_cast<uint8_t>(chunk[i] << 4);
} else {
// already have `remainder = yyyy0000`
// char = xxxx xxxx xxxx
// ^^^^ ^^^^^^^^^
// remainder = yyyyxxxx write after remainder
// remainder = 00000000
remainder &= static_cast<unsigned char>(chunk[i]) >> 8 & 0xF0;
fwrite(&remainder, sizeof(remainder), 1, t_out);
if constexpr (debug_mode) {
std::printf("writing remainder: %x\t\t", remainder);
}
auto temp = static_cast<unsigned char>(chunk[i]);
fwrite(&temp, sizeof(temp), 1, t_out);
if constexpr (debug_mode) {
std::printf("writing: %x\n", temp);
}
remainder = 0x00;
}
}
if(remainder != 0) {
fwrite(&remainder, sizeof(remainder), 1, t_out);
}
}
}
void write_chunk(std::ofstream &t_out, const vuint16 &t_chunk) {
const auto output = pack(t_chunk);
const auto chunk_size = static_cast<uint32_t>(output.size());
t_out.write(reinterpret_cast<const char *>(&chunk_size), sizeof(chunk_size));
t_out.write(reinterpret_cast<const char *>(output.data()),
sizeof(output[0]) * output.size());
}

View File

@ -1,15 +1,31 @@
/**
* \file io.h
* \brief Header for file reading and writing
*/
#ifndef LZW_SRC_IO_H_
#define LZW_SRC_IO_H_
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <cstdint>
#include <vector>
void write_file(std::ofstream &,
const std::vector<std::vector<std::uint16_t>> &);
/*
* Un fichier compressé se compose ainsi :
* char_size : taille d'un caractère en bits (1B)
* nb_chunk : nombre de chunks (4B)
* chunks* : chunks
*
* Un chunk se compose ainsi :
* nb_char_chunk : nombre de caractères du chunk (2B)
* text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) / 8))
*
* Si le dernier caractère ne termine pas le dernier octet du chunk, les
* derniers bits sont mit à zéro
*/
void write_chunk(std::ofstream &, const std::vector<std::uint16_t> &);
/// \brief Écrit dans le fichier le texte compressé
void write_file(FILE *, std::vector<std::vector<std::uint32_t>> &);
#endif /* LZW_SRC_IO_H_ */

View File

@ -1,45 +1,68 @@
/**
* \file main.cc
* \brief Main file
*
*
*
*/
#ifdef Debug
constexpr bool debug_mode = true;
#else
constexpr bool debug_mode = false;
#endif
#include "compress.hh"
#include "uncompress.hh"
#include <cassert>
#include <getopt.h>
#include <tuple>
#include "getopt.h"
using std::printf;
using std::puts;
using std::string;
using std::tuple;
// custom types ///////////////////////////////////////////////////////////////
/*
Dictionnaire :
<
<
numéro chaine précédente,
caractère ASCII
>
numéro chaine courante
>
*/
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
using ustring = std::basic_string<uint8_t>; // chaine non encodée
using uvec = std::vector<uint32_t>; // chaine encodée
/**
* \brief Affichage daide
*/
void help() {
puts("Usage:\n\
lzw [-options] [-i path] [-o path]\n\n\
The default action is to compress the input file to a .lzw file\n\
in which the directory in which the software is executed.\n\
Options available:\n\
-h --help\n\
\tdisplay the current message\n\
-i --input\n\
\tpath to the input file (MANDATORY)\n\
-o --output\n\
\tpath to the output file (if the file already exists, it will be\n\n\
\toverwritten). Default: input path + \".lzw\\n\
-c --compress\n\
\tcompress the input file\n\
-u --uncompress\n\
\tuncompresses the input file to the output file. If no output path\n\
\thas not been entered and if the input file ends with \".lzw\",\n\
\tthe extension \".lzw\" will be removed; otherwise, the extension\n\
\t\"_uncompresed\" will be added");
puts("Usage:");
puts("lzw [-options] [-i path] [-o path]");
puts("\tThe default action is to compress the input file to a .lzw file");
puts("\tin which the directory in which the software is executed.");
puts("\tOptions available:");
puts("\t-i\tpath to the input file (mandatory)");
puts("\t-o\tpath to the output file (if the file already exists, it will");
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
puts("\t-c\tcompress the input file");
puts("\t-d\tdecompresses the input file to the output file. If no output");
puts("\t\tpath has not been entered and if the input file ends with ");
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
puts("\t\textension \".uncompresed\" will be added");
}
[[nodiscard]] std::tuple<string, string, bool> process_args(int t_argc,
char *t_argv[]) {
auto ret = std::make_tuple(string{}, string{}, true);
int main(int argc, char *argv[]) {
if constexpr (debug_mode) {
for (int i = 0; i < argc; ++i)
printf("argv[%d] = %s\n", i, argv[i]);
}
std::string input_path{};
std::string output_path{};
bool compressing = true;
while (true) {
int option_index = 0;
static struct option long_options[] = {
@ -49,49 +72,97 @@ Options available:\n\
{"compress", no_argument, nullptr, 'c'},
{"uncompress", no_argument, nullptr, 'u'},
{nullptr, 0, nullptr, 0}};
int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
int c = getopt_long(argc, argv, "hi:o:cu", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 0:
case 0: {
if constexpr (debug_mode) {
printf("\noption %s", long_options[option_index].name);
if (optarg) {
printf(" with arg %s\n", optarg);
}
}
break;
case 'h':
}
case 'h': {
if constexpr (debug_mode) {
printf("From main - option --help passed\n");
}
help();
exit(0);
case 'i':
std::get<0>(ret) = optarg;
return 0;
}
case 'i': {
if constexpr (debug_mode) {
printf("From main - option --input with value '%s'\n", optarg);
}
input_path = optarg;
break;
case 'o':
std::get<1>(ret) = optarg;
}
case 'o': {
if constexpr (debug_mode) {
printf("From main - option --output with value '%s'\n", optarg);
}
output_path = optarg;
break;
case 'c':
std::get<2>(ret) = true;
}
case 'c': {
if constexpr (debug_mode) {
printf("From main - option --compress\n");
}
compressing = true;
break;
case 'u':
std::get<2>(ret) = false;
}
case 'u': {
if constexpr (debug_mode) {
printf("From main - option --uncompress\n");
}
compressing = false;
break;
}
case '?':
[[fallthrough]];
default:
default: {
puts("Error: unknown parameter.");
if constexpr (debug_mode) {
printf("From main - option -?\n");
}
help();
exit(1);
return 1;
}
}
}
return ret;
}
int main(int argc, char *argv[]) {
const auto [input_path, output_path, compressing] = process_args(argc, argv);
if (input_path.empty()) {
help();
return 0;
puts("Error: no input file specified");
return 2;
}
if (compressing) {
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
/*
TODO:
- compresser le fichier dentrée morceaux par morceaux, 16Ko à la fois
- écrire la taille du segment compressé, puis le segment compressé
- multithreading
- compression multiple : nombre de compressions puis fichier compressé
- bit-packing, limiter la taille du dictionnaire pour un certain nombre de
bits.
*/
if constexpr (debug_mode) {
puts("Beginning compression");
}
if (output_path.empty()) {
compress(input_path, nullptr);
} else {
compress(input_path, output_path.c_str());
}
// compress(input_path, output_path.c_str());
} else {
uncompress(input_path,
(output_path.empty()) ? nullptr : output_path.c_str());
puts("Not yet implemented :(");
/*
Inversion des types du dictionnaire pour retrouver les chaînes plus
aisément
*/
}
return 0;
}

View File

@ -1,60 +0,0 @@
#include "uncompress.hh"
#include "bitpack.hh"
#include "common.hh"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iterator>
using std::fclose;
using std::fopen;
using std::fseek;
using std::string;
using std::uint16_t;
using std::vector;
using ustring = std::basic_string<unsigned char>;
using vuint16 = vector<uint16_t>;
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
ustring ret{};
uint16_t old = 0;
std::map<uint16_t, ustring> dict{};
ret.append({static_cast<unsigned char>(t_compressed[0])});
old = t_compressed[0];
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
const auto uncompressed{dico_uncompress(dict, *it, old)};
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
old = *it;
}
return ret;
}
void uncompress(const string &t_input_name, const char *t_output_name) {
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
assert(input);
std::ofstream output{(t_output_name != nullptr)
? t_output_name
: t_input_name + "_uncompressed",
std::ios::out | std::ios::binary};
assert(output.is_open());
uint16_t nb_chunks = 0;
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
for (uint16_t i = 0; i < nb_chunks; ++i) {
uncompress_chunk(input, output);
}
output.close();
std::fclose(input);
}
void uncompress_chunk(FILE *const t_input, std::ofstream &t_output) {
uint32_t size_chunk = 0;
fread(&size_chunk, sizeof(size_chunk), 1, t_input);
auto chunk = std::make_unique<unsigned char[]>(size_chunk);
fread(chunk.get(), sizeof(unsigned char), size_chunk, t_input);
auto unpacked = unpack(ustring{chunk.get(), chunk.get() + size_chunk});
auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
t_output.write(reinterpret_cast<const char *>(uncompressed_chunk.data()),
sizeof(uncompressed_chunk[0]) * uncompressed_chunk.size());
}

View File

@ -1,16 +0,0 @@
#ifndef LZW_SRC_UNCOMPRESS_H_
#define LZW_SRC_UNCOMPRESS_H_
#include <fstream>
#include <memory>
#include <string>
#include <vector>
[[nodiscard]] std::basic_string<unsigned char>
lzw_uncompress(std::vector<std::uint16_t> &&);
void uncompress(const std::string &, const char *);
void uncompress_chunk(FILE *, std::ofstream &);
#endif /* LZW_SRC_UNCOMPRESS_H_ */

52
src/utf8.cc Normal file
View File

@ -0,0 +1,52 @@
/**
* \file utf8.cc
* \brief Implementation for UTF-8 related functions
*/
#include "utf8.hh"
#include <string>
using FILE = std::FILE;
using uint8_t = std::uint8_t;
using uint32_t = std::uint32_t;
using ustring = std::basic_string<uint8_t>; // chaine non encodée
/**
* Les caractères \c passés en argument sont écrit dans le fichier de sortie au
* format UTF-8
*
* \param[in] out Fichier de sortie
* \param[in] c Caractères à écrire dans \p out
*/
void write_utf8(FILE* t_out, uint32_t t_c) {
if(t_c < 128) {
fwrite(&t_c, sizeof(unsigned char), 1, t_out);
return;
}
size_t loops = 0;
unsigned char header = 0;
if (t_c < 2048) {
loops = 1;
header = 0xC0;
} else if (t_c < 65536) {
loops = 2;
header = 0xE0;
} else if (t_c < 2097152) {
loops = 3;
header = 0xF0;
} else if (t_c < 67108864) {
loops = 4;
header = 0xF8;
} else {
loops = 5;
header = 0xFC;
}
ustring str(loops + 1, 0);
for (size_t i = 0; i <= loops; ++i) {
str[i] = static_cast<unsigned char>(
((t_c & ((i == loops) ? 0x3F : 0xFF)) >> ((loops - i) * 6)) +
((i == 0) ? header : 0x80));
}
fwrite(str.data(), sizeof(unsigned char), str.size(), t_out);
}

26
src/utf8.hh Normal file
View File

@ -0,0 +1,26 @@
/**
* \file utf8.hh
* \brief Header for UTF-8 related functions
*/
#ifndef LZW_SRC_UTF8_H_
#define LZW_SRC_UTF8_H_
#include <cstdio>
#include <cstdint>
/*
Lencodage des caractères se fait en UTF-8
char < 128 => "0xxxxxxx" 7bits
char < 2,048 => "110xxxxx 10xxxxxx" 11bits
char < 65,536 => "1110xxxx 10xxxxxx 10xxxxxx" 16bits
char < 2,097,152 => "11110xxx 10xxxxxx 10xxxxxx 10xxxxxx" 21bits
char < 67,108,864 => "111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 26bits
char < 2,147,483,648 => "1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 31bits
*/
/// \brief Écrit les caractères au format UTF-8
void write_utf8(std::FILE* t_out, std::uint32_t t_c);
#endif /* LZW_SRC_UTF8_H_ */