Compare commits

...

27 Commits

Author SHA1 Message Date
Phuntsok Drak-pa bbfb669781 Merge branch 'master' of labs.phundrak.fr:phundrak/lzw-assignment 2019-08-22 17:57:03 +02:00
Phuntsok Drak-pa e863923b0d Stripping binary from unneeded data 2019-08-22 17:56:58 +02:00
Phuntsok Drak-pa 412089e653 added nodiscard attributes 2019-08-19 16:40:13 +02:00
Phuntsok Drak-pa 766e9ceb41 better gitignore 2019-06-16 17:21:11 +02:00
Phuntsok Drak-pa f763f476fb reduced usage of puts 2018-11-21 10:28:13 +01:00
Phuntsok Drak-pa 946350c88b help message instead of error in case of no input path 2018-11-21 01:59:02 +01:00
Phuntsok Drak-pa fdcdc6519a better code quality 2018-11-21 01:49:30 +01:00
Phuntsok Drak-pa 988dbcaa87 woops, forgot these lines 2018-11-21 01:21:13 +01:00
Phuntsok Drak-pa 631bca6d8f fix building issue with travis I hope 2018-11-21 01:13:37 +01:00
Phuntsok Drak-pa db9aa784ef travis update 2018-11-21 01:05:46 +01:00
Phuntsok Drak-pa b3ea126e42 added cache for apt with travis 2018-11-21 00:59:02 +01:00
Phuntsok Drak-pa ba4706cec4 travis update 2018-11-21 00:56:35 +01:00
Phuntsok Drak-pa 4192e9789d Merge branch 'master' of github.com:Phundrak/lzw-assignment 2018-11-21 00:53:55 +01:00
Phuntsok Drak-pa 652146a93a testing cloudsonar with travis 2018-11-21 00:52:03 +01:00
Phuntsok Drak-pa 834504c886
Update README.md 2018-11-21 00:38:26 +01:00
Phuntsok Drak-pa 2b932f3e1a added gitlabCI config file 2018-11-02 14:41:43 +01:00
Phuntsok Drak-pa 6d4e29b030 cleaned a bit and made general Makefile 2018-06-24 18:59:17 +02:00
Phuntsok Drak-pa 8e23eb858e BUG FIXED, DOBBY IS FREEEEEE!!! 2018-06-24 18:34:43 +02:00
Phuntsok Drak-pa 3e9d94d865 did not mean for this line to be commited 2018-06-24 18:08:50 +02:00
Phuntsok Drak-pa 0523fe77f2 Bug identified, first char of new chunk not witten (see uncompress.cc:59) 2018-06-24 18:03:09 +02:00
Phuntsok Drak-pa e01334a566 removed Doxygen from release build 2018-06-24 18:02:39 +02:00
Phuntsok Drak-pa 4c212907c2 bugfixes 2018-06-21 17:38:51 +02:00
Phuntsok Drak-pa 94435ef305 updated help message 2018-06-18 18:04:07 +02:00
Phuntsok Drak-pa d3ea93d13e Updated README.md 2018-06-18 17:53:38 +02:00
Phuntsok Drak-pa 8b8032e533 hopefully will fix Travis-CI builds 2018-06-18 17:10:49 +02:00
Phuntsok Drak-pa 5830f4225c bugfix: fixed writing mask for charsize increase 2018-06-18 16:51:38 +02:00
Phuntsok Drak-pa 8777183821 better, but still some bugs 2018-06-17 06:38:57 +02:00
22 changed files with 326 additions and 508 deletions

View File

@ -5,5 +5,4 @@ jobs:
- image: purplekarrot/gcc-8
steps:
- checkout
- run: apt-get update && apt-get install -y doxygen doxygen-docs doxygen-latex doxygen-gui graphviz
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j

12
.gitignore vendored
View File

@ -1,15 +1,13 @@
*~
gmon\.out
/cmake-build-debug/Makefile
cmake-build-debug/
\.idea/
*.lzw
docs/Doxyfile
\.scannerwork/
docs/html/
bw-output/
docs/latex/
bin/
build/
debug/

7
.gitlab-ci.yml Normal file
View File

@ -0,0 +1,7 @@
image: rikorose/gcc-cmake:latest
stages:
- build
build:
stage: build
script:
- mkdir -p build bin && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j

View File

@ -4,66 +4,48 @@ compiler: clang
os: linux
dist: trusty
addons:
sonarcloud:
organization: "phundrak-github"
token:
secure: ${SONAR_TOKEN}
apt:
config:
retries: true
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-trusty-5.0
packages:
- g++-7
- clang-5.0
- doxygen
- doxygen-doc
- doxygen-latex
- doxygen-gui
- graphviz
cache:
apt: true
matrix:
include:
# Linux Clang C++17
- env:
BUILD_TYPE=Debug
BUILD_TYPE=debug
BIN_DIR=debug
CC=clang-5.0
CXX=clang++-5.0
- env:
BUILD_TYPE=Release
BUILD_TYPE=release
BIN_DIR=bin
CC=clang-5.0
CXX=clang++-5.0
# Linux GCC C++17
- env:
- MATRIX_EVAL="BUILD_TYPE=Debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
- MATRIX_EVAL="BUILD_TYPE=debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
compiler: gcc
before_install:
- eval "${MATRIX_EVAL}"
- env:
- MATRIX_EVAL="BUILD_TYPE=Release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
- MATRIX_EVAL="BUILD_TYPE=release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
compiler: gcc
before_install:
- eval "${MATRIX_EVAL}"
# OSX Clang
# - os: osx
# osx_image: xcode9.3
# env:
# BUILD_TYPE=Release
# BIN_DIR=bin
# before_install:
# - brew update
# - brew install doxygen graphviz
# install:
# brew upgrade cmake
# - os: osx
# osx_image: xcode9.3
# env:
# BUILD_TYPE=Debug
# BIN_DIR=debug
# before_install:
# brew update
# install:
# brew upgrade cmake
script:
- cd build
- cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
- make -j
- make $BUILD_TYPE
- build-wrapper-linux-x86-64 --out-dir bw-output make clean all
- sonar-scanner -Dsonar.projectKey=Phundrak_lzw-assignment -Dsonar.sources=./src/ -Dsonar.cfamily.build-wrapper-output=bw-output -Dsonar.host.url=https://sonarcloud.io

View File

@ -34,26 +34,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "../debug/")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
# indicate the doc build as an option, ON by default
if(CMAKE_BUILD_TYPE MATCHES "^[Rr]elease")
option(BUILD_DOC "Build documentation" ON)
find_package(Doxygen
REQUIRED dot)
if(DOXYGEN_FOUND)
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile.in)
set(DOXYGEN_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile)
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
message("Doxygen build started")
add_custom_target(docs ALL
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen"
VERBATIM )
else()
message("Doxygen needs to be installed to generate the doxygen documentation.")
endif()
endif()
include_directories(includes)
file(GLOB SOURCES "src/*.cc")
add_executable(${TGT} ${SOURCES})

16
Makefile Normal file
View File

@ -0,0 +1,16 @@
all:
@mkdir -p build
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
@strip bin/project_lzw
release:
@mkdir -p build
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
@strip bin/projet_lzw
debug:
@mkdir -p build
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Debug .. && make
clean:
rm -rf bin/* build/* debug/*

View File

@ -2,6 +2,7 @@
[![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade)
[![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
[![SonarCloud Badge](https://sonarcloud.io/api/project_badges/measure?project=Phundrak_lzw-assignment&metric=alert_status)](https://sonarcloud.io/dashboard?id=Phundrak_lzw-assignment)
[![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org)
# LZW Compressing tool
@ -9,3 +10,16 @@
This is a university assignment for which I aim to create an LZW algorithm implementation to create a small tool similar to `gzip` and `gunzip` that can compress and uncompress files in a lossless fashion.
This project is written is C++17, compiled with clang under a UNIX environment. Other compilers and environments will not be tested.
## How to use it
Currently, five different options are available to the user:
- `-h` or `--help` will show how to use `projet_lzw`
- `-c` or `--compress` tells `projet_lzw` to compress the input file
- `-u` or `--uncompress` tells `projet_lzw` to uncompress the input file
- `-i <file>` or `--input <file path>` specifies the input file to be compressed or uncompressed (**MANDATORY**)
- `-o <file>` or `--output <file path>` specifies the name of the output file. If not used, the default output name for compression is `output.lzw`, and the default name for uncompressing is `<filename>_uncompressed`.
By default, `projet_lzw` will uncompress the (mandatory) input file.
It is planned to add in the future a sixth option, `-p` or `--passes` that will allow to compress multiple times the input file.

2
bin/.gitignore vendored
View File

@ -1,2 +0,0 @@
*
!.gitignore

2
build/.gitignore vendored
View File

@ -1,2 +0,0 @@
*
!.gitignore

2
debug/.gitignore vendored
View File

@ -1,2 +0,0 @@
*
!.gitignore

View File

@ -1,33 +0,0 @@
OUTPUT_DIRECTORY = @CMAKE_CURRENT_SOURCE_DIR@/docs/
INPUT = @CMAKE_CURRENT_SOURCE_DIR@/src/ @CMAKE_CURRENT_SOURCE_DIR@/docs
DOXYFILE ENCODING = UTF-8
PROJECT_NAME = "Compression LZW"
PROJECT_NUMBER = 0.3
PROJECT_BRIEF = "Utilitaire de compression/décompression de fichiers via lalgorithme LZW"
ALLOW_UNICODE_NAMES = YES
OUTPUT_LANGUAGE = French
FULL_PATH_NAMES = NO
TAB_SIZE = 2
EXTRACT_ALL = YES
CASE_SENSE_NAMES = YES
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.cc \
*.hh
RECURSIVE = YES
EXAMPLE_PATTERNS = *
SOURCE_BROWSER = YES
INLINE_SOURCES = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
HTML_TIMESTAMP = YES
GENERATE_DOCSET = NO
GENERATE_HTMLHELP = NO
LATEX_SOURCE_CODE = YES
LATEX_TIMESTAMP = YES
ENABLE_PREPROCESSING = NO
HAVE_DOT = YES
UML_LOOK = YES
CALL_GRAPH = YES
CALLER_GRAPH = YES
INTERACTIVE_SVG = YES

View File

@ -10,110 +10,9 @@ using vuint16 = vector<uint16_t>;
using vuchar = vector<uchar>;
using ustring = std::basic_string<unsigned char>;
int max(const int t_n) {
return ipow(2, t_n) - 1;
}
[[nodiscard]] int max(const int t_n) { return ipow(2, t_n) - 1; }
///////////////////////////////////////////////////////////////////////////////
// packing //
///////////////////////////////////////////////////////////////////////////////
vuchar pack(const vuint16 &t_input) {
return pack_n(t_input.begin(), t_input.end(), 9);
}
/**
* Packs \p t_input into unsigned char, assuming the max value of t_input
* only takes \p t_n bits
*
* \param t_input_begin pointer to the beginning of the vector of values to be packed
* \param t_input_end pointer to the end of the input vector
* \param t_n maximum size of an input value in bits
* \return Returns a vector of unsigned char containing the packed values from t_input
*/
vuchar pack_n(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end, const int t_n) {
if (t_n == 16) {
return pack_16(t_input_begin, t_input_end);
}
const int max_value = max(t_n); // max value held within t_n bits
#ifdef Debug
std::printf("%d bits! %ld chars remaining\n", t_n,
std::distance(t_input_begin, t_input_end));
std::printf("max: %d\n", max_value);
#endif
int step = t_n / 8;
int left_shift = 0;
int middle_shift = 0;
int right_shift = 0;
uchar current_char = 0;
bool char_touched = false;
vuchar ret{};
for (auto it = t_input_begin; it != t_input_end; ++it) {
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
ret.push_back((current_char | (*it >> left_shift)) & 0xFFu);
// current_char = 0;
bool zero_right_shift = (right_shift == 0);
right_shift -= step;
if (right_shift < 0) {
middle_shift = std::abs(right_shift);
right_shift = 8 - std::abs(right_shift);
if (!zero_right_shift) {
current_char = (*it >> middle_shift) & 0xFFu;
ret.push_back(current_char);
}
}
if (right_shift == 0) {
current_char = *it & 0xffu;
ret.push_back(current_char);
current_char = 0;
char_touched = false;
} else {
current_char = (*it << right_shift) & 0xFFu;
char_touched = true;
}
// il faut écrire la valeur pour la décompression
if (*it >= max_value) {
if (char_touched) {
ret.push_back(current_char);
}
const auto next_vec = pack_n(it, t_input_end, t_n + 1);
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
}
}
if (char_touched) {
ret.push_back(current_char);
}
return ret;
}
vuchar pack_16(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end) {
#ifdef Debug
std::printf("16 bits! %ld chars remaining\n",
std::distance(t_input_begin, t_input_end));
#endif
vuchar ret{};
std::for_each(t_input_begin, t_input_end, [&](const auto value) {
ret.push_back((value >> 8) & 0xFFu);
ret.push_back(value & 0xFFu);
});
return ret;
}
///////////////////////////////////////////////////////////////////////////////
// unpacking //
///////////////////////////////////////////////////////////////////////////////
constexpr uint16_t mask_n(int t_nb_bits) {
[[nodiscard]] constexpr uint16_t mask_n(int t_nb_bits) {
if (t_nb_bits == 0) {
return 0;
}
@ -128,60 +27,151 @@ constexpr uint16_t masks[17] = {
mask_n(6), mask_n(7), mask_n(8), mask_n(9), mask_n(10), mask_n(11),
mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)};
vuint16 unpack(ustring &&t_input) {
return unpack_n(t_input.begin(), t_input.end(), 9);
///////////////////////////////////////////////////////////////////////////////
// packing //
///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
vuchar vec{};
return pack_n(t_input.begin(), t_input.end(), vec, 9);
}
vuint16 unpack_n(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end, const int t_n) {
#ifdef Debug
std::printf("Chunk! %d bits, %ld compressed chars\n", t_n,
std::distance(t_begin, t_end));
#endif
[[nodiscard]] vuchar pack_n(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end,
vuchar &t_res, int t_n) {
if (t_n == 16) {
return unpack_16(t_begin, t_end);
return pack_16(t_input_begin, t_input_end, t_res);
}
int step = t_n / 8;
const int max_value = max(t_n); // max value held within t_n bits
int step = t_n % 8;
int left_shift = 0;
int right_shift = 0;
uchar current_char = 0;
bool char_touched = false;
// pour chaque élément
for (auto it = t_input_begin; it != t_input_end; ++it) {
// si on a atteint ou dépassé la valeur maximale, on change de nombre de
// bits
if (*it >= max_value) {
// écriture du masque pour notifier à la décompression du changement de
// bits
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
const auto mask = masks[t_n] >> left_shift;
t_res.push_back(static_cast<uchar>(current_char | mask));
bool zero_rs = (right_shift == 0);
right_shift -= step;
if (right_shift < 0 && !zero_rs) {
// si right_shift est inférieur à zéro
// si right_shift était différent de zéro, alors extra octet
current_char = static_cast<uchar>(masks[t_n] >> (-right_shift) & 0xFFU);
t_res.push_back(current_char);
}
t_res.push_back(static_cast<uchar>(masks[t_n]));
return pack_n(it, t_input_end, t_res, t_n + 1);
}
// écriture normale
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
t_res.push_back(
static_cast<uchar>(current_char | (*it >> left_shift & 0xFFU)));
bool zero_rs = (right_shift == 0);
right_shift -= step;
if (right_shift < 0) {
if (!zero_rs) {
current_char = static_cast<uchar>(*it >> (-right_shift) & 0xFFU);
t_res.push_back(current_char);
}
right_shift = 8 + right_shift;
}
if (right_shift == 0) {
current_char = static_cast<uchar>(*it & 0xFFU);
t_res.push_back(current_char);
current_char = 0;
char_touched = false;
} else {
current_char = static_cast<uchar>(*it << right_shift & 0xFFU);
char_touched = true;
}
}
if (char_touched) {
t_res.push_back(current_char);
}
return t_res;
}
[[nodiscard]] vuchar pack_16(const vuint16::const_iterator t_input_begin,
const vuint16::const_iterator t_input_end,
vuchar &t_res) {
std::for_each(t_input_begin, t_input_end, [&t_res](const auto value) {
t_res.push_back(static_cast<uchar>(value >> 8 & 0xFFU));
t_res.push_back(static_cast<uchar>(value & 0xFFU));
});
return t_res;
}
///////////////////////////////////////////////////////////////////////////////
// unpacking //
///////////////////////////////////////////////////////////////////////////////
[[nodiscard]] vuint16 unpack(ustring &&t_input) {
vuint16 vec{};
return unpack_n(t_input.begin(), t_input.end(), vec, 9);
}
[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
vuint16 &t_res, int t_n) {
if (t_n == 16) {
return unpack_16(t_begin, t_end, t_res);
}
int step = t_n % 8;
int left_shift = 0;
int right_shift = 0;
vuint16 ret{};
const int max_value = max(t_n);
for (auto it = t_begin; it < t_end - 1; /* nope */) {
uint16_t current_char = 0;
// left bits
left_shift =
((left_shift += step) >= t_n) ? (left_shift - t_n) + step : left_shift;
current_char = static_cast<uint16_t>(*it << left_shift);
if ((left_shift += step) >= t_n) {
left_shift = (left_shift - t_n) + step;
}
current_char = static_cast<uint16_t>(*it << left_shift) & masks[t_n];
// right bits
bool zero_rs = right_shift;
bool zero_rs = (right_shift == 0);
right_shift -= step;
if (right_shift < 0) {
// optional middle bits before right bits
if (zero_rs) {
current_char |= *++it << (-right_shift);
// if previous right shift was negative and not zero
if (!zero_rs) {
current_char |= *++it << (-right_shift) & masks[16 + right_shift];
}
right_shift = 8 + right_shift;
}
current_char |= *(++it) >> right_shift;
current_char |= *++it >> right_shift & masks[8 - right_shift];
// char made!
ret.push_back(current_char &= masks[t_n]);
if (current_char >= max_value) {
const auto next_vec = unpack_n(it + 1, t_end, t_n + 1);
ret.insert(ret.end(), next_vec.begin(), next_vec.end());
return ret;
if (current_char >= max_value) { // if it is the mask
return unpack_n(it + 1, t_end, t_res, t_n + 1);
}
current_char &= masks[t_n];
t_res.push_back(current_char);
if (right_shift == 0) {
++it;
}
}
return ret;
return t_res;
}
vuint16 unpack_16(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end) {
vuint16 ret{};
[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
vuint16 &t_res) {
for (auto it = t_begin; it < t_end; ++it) {
ret.push_back(static_cast<uint16_t>((*it << 8) | *(++it)));
t_res.push_back(static_cast<uint16_t>(*it << 8 | *++it));
}
return ret;
return t_res;
}

View File

@ -1,8 +1,3 @@
/**
* \file bitpack.hh
* \brief Header for bit-packing functions
*/
#ifndef LZW_SRC_BITPACK_H_
#define LZW_SRC_BITPACK_H_
@ -10,27 +5,30 @@
#include <string>
#include <vector>
/// \brief Bat-packs the input dynamically
std::vector<unsigned char> pack(const std::vector<std::uint16_t> &);
[[nodiscard]] std::vector<unsigned char>
pack(const std::vector<std::uint16_t> &);
/// \brief Packs std::uint16_t of n bits into unsigned char
std::vector<unsigned char>
pack_n(const std::vector<std::uint16_t>::const_iterator,
const std::vector<std::uint16_t>::const_iterator, const int);
[[nodiscard]] std::vector<unsigned char>
pack_n(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator, std::vector<unsigned char> &,
int);
/// \brief Specialization of \ref pack_n for 16bits
std::vector<unsigned char>
pack_16(const std::vector<std::uint16_t>::const_iterator,
const std::vector<std::uint16_t>::const_iterator);
[[nodiscard]] std::vector<unsigned char>
pack_16(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<std::uint16_t> unpack(std::basic_string<unsigned char> &&);
[[nodiscard]] std::vector<std::uint16_t>
unpack(std::basic_string<unsigned char> &&);
std::vector<std::uint16_t>
unpack_n(const std::basic_string<unsigned char>::const_iterator,
const std::basic_string<unsigned char>::const_iterator, const int t_n);
[[nodiscard]] std::vector<std::uint16_t>
unpack_n(std::basic_string<unsigned char>::const_iterator t_begin,
std::basic_string<unsigned char>::const_iterator t_end,
std::vector<std::uint16_t> &, int t_n);
std::vector<std::uint16_t>
unpack_16(const std::basic_string<unsigned char>::const_iterator,
const std::basic_string<unsigned char>::const_iterator);
[[nodiscard]] std::vector<std::uint16_t>
unpack_16(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
#endif /* LZW_SRC_BITPACK_H_ */

View File

@ -1,8 +1,3 @@
/**
* \file common.cc
* \brief Implementation for functions in common
*/
#include "common.hh"
using std::uint16_t;
@ -11,7 +6,7 @@ using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using ustring = std::basic_string<unsigned char>;
using p_ustring = std::shared_ptr<ustring>;
int ipow(int base, int exp) {
[[nodiscard]] int ipow(int base, int exp) {
int result = 1;
for (;;) {
if (exp & 1) {
@ -26,43 +21,21 @@ int ipow(int base, int exp) {
return result;
}
/**
* Cette fonction a pour double usage la recherche dune chaine de caractères
* dans le dictionnaire, ou bien lajout dune nouvelle chaîne si celle-ci
* nest pas déjà présente. Une chaine de caractères est représentée par un
* couple numéro de chaine / caractère, le numéro de chaine renvoyant au
* caractère précédent (soit son code ASCII, soit son indice dans le
* dictionnaire) et le caractère se référant au dernier caractère de la chaine
* courante. Si le numéro de chaine est -1, alors il sagit du premier caractère
* de la chaine, et la valeur renvoyée sera la valeur ASCII du caractère. La
* fonction renvoie une paire bool/uint16_t, la valeur booléene indiquant si une
* nouvelle fut ajoutée dans le dictionnaire ou non, et le uint16_t indiquant la
* valeur numérique de la chaîne dans le dictionnaire.
*
* \param t_dictionary Dictionnaire
* \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
* \return const std::pair<bool, uint16_t>
*/
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
const uint16_t t_nr_chaine, const uint8_t t_c) {
[[nodiscard]] std::pair<bool, uint16_t>
dico(dic_comp_t &t_dictionary, const uint16_t t_nr_chaine, const uint8_t t_c) {
if (t_nr_chaine == 0xFFFF) {
return std::make_pair(true, t_c);
}
auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
return (e != 0) ? std::make_pair(true, e)
: std::make_pair(false, (e = static_cast<uint16_t>(
t_dictionary.size() + 255)));
if (e != 0)
return std::make_pair(true, e);
e = static_cast<uint16_t>(t_dictionary.size() + 255);
return std::make_pair(false, e);
}
/**
* Detailed description
*
* \param t_dict Dictionnaire
* \return Retourne une chaîne de caractères non signés
*/
ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
const uint16_t t_code, const uint16_t t_old) {
[[nodiscard]] ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
const uint16_t t_code,
const uint16_t t_old) {
// le code existe dans le dictionnaire sil est < 256
if (t_code < 256) {
ustring e{static_cast<unsigned char>(t_code)};

View File

@ -1,8 +1,3 @@
/**
* \file common.hh
* \brief Header for functions in common
*/
#ifndef LZW_SRC_COMMON_H_
#define LZW_SRC_COMMON_H_
@ -10,14 +5,13 @@
#include <map>
#include <memory>
int ipow(int, int);
[[nodiscard]] int ipow(int, int);
/// \brief Recherche ou ajout de chaine dans le dictionnaire
std::pair<bool, std::uint16_t>
[[nodiscard]] std::pair<bool, std::uint16_t>
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
const std::uint16_t, const std::uint8_t);
std::basic_string<unsigned char>
[[nodiscard]] std::basic_string<unsigned char>
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
const std::uint16_t, const std::uint16_t);

View File

@ -1,14 +1,11 @@
/**
* \file compress.cc
* \brief Implementation of compression
*/
#include "compress.hh"
#include "io.hh"
#include "common.hh"
#include "io.hh"
#include <cassert>
#include <cstdlib>
#include <fstream>
#include <iterator>
using std::ios;
using std::string;
using std::uint16_t;
using std::uint8_t;
@ -20,33 +17,39 @@ using ustring = std::basic_string<unsigned char>;
using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using std::printf;
const size_t DICT_MAX = static_cast<size_t>(ipow(2, 17) - 256); /* 16 bits */
[[nodiscard]] ustring read_file(const string &filename) {
std::ifstream file{filename, ios::binary};
assert(file);
file.unsetf(ios::skipws);
file.seekg(0, ios::end);
const auto file_size = file.tellg();
file.seekg(0, ios::beg);
ustring res{};
res.reserve(file_size);
res.insert(res.begin(), std::istream_iterator<unsigned char>(file),
std::istream_iterator<unsigned char>());
file.close();
return res;
}
/**
* La chaîne de caractères \p t_text est lue caractère par caractère, et est
* selon la valeur de retour de la fonction \ref dico (permettant dans le même
* temps la création du dictionnaire), on rajoute ou non un nouveau caractère
* encodé sur 12bits dans le chunk courant. Dès que le dictionnaire est plein
* (2^12 caractères), le chunk est sauvegardé et vidé, et le dictionnaire est
* réinitialisé.
*
* \param t_text Chaîne de caractères uint8_t représentant le fichier d'entrée
* \return Vecteur de chunks (vecteurs de uint16_t)
*/
vvuint16 lzw_compress(ustring &&t_text) {
std::puts("Compressing...");
[[nodiscard]] vvuint16 lzw_compress(ustring &&t_text) {
vvuint16 res{};
const auto DICT_MAX = static_cast<size_t>(ipow(2, 14) - 256); /* 16 bits */
uint16_t w = 0xFFFF;
vuint16 chunk{};
vvuint16 res{};
dict_t dict{};
for (const auto c : t_text) {
if (dict.size() >= DICT_MAX) {
res.push_back(std::move(chunk));
chunk = vuint16{};
dict = dict_t{};
if (w != 0xFFFF) {
chunk.push_back(w);
}
res.push_back(chunk);
w = 0xFFFF;
chunk.clear();
dict.clear();
}
if (const auto &[yes, pos] = dico(dict, w, static_cast<uint8_t>(c)); yes) {
if (const auto &[exists, pos] = dico(dict, w, static_cast<uint8_t>(c));
exists) {
w = pos;
} else {
chunk.push_back(w);
@ -60,35 +63,15 @@ vvuint16 lzw_compress(ustring &&t_text) {
return res;
}
/**
* Wrapper de la fonction \ref lzw_compress gérant l'ouverture, la lecture,
* l'écriture et la fermeture des fichiers dentrée et de sortie. Si \p
* t_out_file est nul (chemin non spécifié), il prendra alors la valeur de
* \p t_in_file à laquelle sera annexé lextension `.lzw`.
*
* \param[in] t_in_file Chemin vers le fichier dentrée
* \param[in] t_out_file Chemin vers le fichier de sortie
*/
void compress(const std::string &t_in_file, const char *t_out_file) {
FILE *const input_file = fopen(t_in_file.c_str(), "rb");
assert(input_file);
FILE *const out = (t_out_file != nullptr) ? fopen(t_out_file, "wb")
: fopen("output.lzw", "wb");
if (out == nullptr) {
std::ofstream out{(t_out_file != nullptr) ? t_out_file : "output.lzw",
ios::out | ios::binary};
if (!out.is_open()) {
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
<< ": could not open output file. Aborting...\n";
std::fclose(input_file);
exit(1);
}
std::fseek(input_file, 0L, SEEK_END);
const auto file_size = static_cast<size_t>(ftell(input_file));
std::rewind(input_file);
auto raw_text = std::make_unique<unsigned char[]>(file_size);
std::fread(raw_text.get(), sizeof(unsigned char), file_size, input_file);
const auto compressed_text(lzw_compress(ustring{raw_text.get(), &raw_text[file_size]}));
const auto compressed_text(lzw_compress(read_file(t_in_file)));
write_file(out, compressed_text);
fclose(out);
fclose(input_file);
out.close();
}

View File

@ -1,21 +1,14 @@
/**
* \file compress.hh
* \brief Header for compression functions
*/
#ifndef LZW_SRC_COMPRESS_H_
#define LZW_SRC_COMPRESS_H_
#include "common.hh"
#include <vector>
#include <iostream>
#include <thread>
#include <vector>
/// \brief Compression d'une chaine de caractères
std::vector<std::vector<std::uint16_t>>
[[nodiscard]] std::vector<std::vector<std::uint16_t>>
lzw_compress(std::basic_string<unsigned char> &&);
/// \brief Wrapper de \ref lzw_compress
void compress(const std::string &, const char *);
#endif /* LZW_SRC_COMPRESS_H_ */

View File

@ -1,65 +1,27 @@
/**
* \file io.cc
* \brief Body for file reading and writing
*/
#include "io.hh"
#include "bitpack.hh"
#include <array>
#include <algorithm>
#ifdef Debug
constexpr bool debug_mode = true;
#include <algorithm>
#else
constexpr bool debug_mode = false;
#endif
using std::vector;
using std::uint16_t;
using std::vector;
using vuint16 = vector<uint16_t>;
using vvuint16 = vector<vuint16>;
/**
* Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de
* sortie est composé des éléments suivants :\n
* - Sur deux octets sont écrit un `uint16_t` déterminant le nombre de chunk
* composant le fichier\n
* - Sont ensuite écrits les chunks sur un nombre variable doctets suivant la
* taille des chunks\n
* \n
* Un chunk est composé de la manière qui suit :\n
* - Sur deux octets sont écrit un `uint32_t` déterminant le nombre doctets
* composant le chunk\n
* - Sur le nombre doctets précisés par le header du chunk se trouvent les
* données compressées par lalgorithme lzw puis via bit-packing.\n
*
* \param[out] t_out Fichier de sortie
* \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out
*/
void write_file(FILE *const t_out, const vvuint16 &t_text) {
const auto size = static_cast<uint16_t>(t_text.size());
if constexpr (debug_mode) {
std::printf("Number of chunks: %u\n", size);
}
fwrite(&size, sizeof(size), 1, t_out);
for (const auto &chunk : t_text) {
if constexpr (debug_mode)
std::printf("Chunk!\n");
void write_file(std::ofstream &t_out, const vvuint16 &t_chunks) {
const auto nr_chunks = static_cast<uint16_t>(t_chunks.size());
#ifdef Debug
std::printf("Number of chunks: %u\n", nr_chunks);
#endif
t_out.write(reinterpret_cast<const char *>(&nr_chunks), sizeof(nr_chunks));
for (const auto &chunk : t_chunks) {
write_chunk(t_out, chunk);
}
}
/**
* Écrit dans le fichier \p t_out le chunk unique \p t_chunk. Se référer à la
* documentation de \ref write_file pour plus de détails.
*
* \param t_out Output file
* \param t_chunk Chunk to be written to \p t_out
*/
void write_chunk(FILE *const t_out, const vuint16 &t_chunk) {
void write_chunk(std::ofstream &t_out, const vuint16 &t_chunk) {
const auto output = pack(t_chunk);
const auto chunk_size = static_cast<uint32_t>(output.size());
fwrite(&chunk_size, sizeof(chunk_size), 1, t_out);
fwrite(output.data(), sizeof(output[0]), output.size(), t_out);
t_out.write(reinterpret_cast<const char *>(&chunk_size), sizeof(chunk_size));
t_out.write(reinterpret_cast<const char *>(output.data()),
sizeof(output[0]) * output.size());
}

View File

@ -1,35 +1,15 @@
/**
* \file io.hh
* \brief Header for file reading and writing
*/
#ifndef LZW_SRC_IO_H_
#define LZW_SRC_IO_H_
#include <cstdio>
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <vector>
/*
* Un fichier compressé se compose ainsi :
* char_size : taille d'un caractère en bits (1B)
* nb_chunk : nombre de chunks (4B)
* chunks* : chunks
*
* Un chunk se compose ainsi :
* nb_char_chunk : nombre de caractères du chunk (2B)
* text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) / 8))
*
* Si le dernier caractère ne termine pas le dernier octet du chunk, les
* derniers bits sont mit à zéro
*/
void write_file(std::ofstream &,
const std::vector<std::vector<std::uint16_t>> &);
/// \brief Écrit dans le fichier le texte compressé
void write_file(FILE *const, const std::vector<std::vector<std::uint16_t>> &);
/// \brief Écrit un chunk dans le fichier de sortie
void write_chunk(FILE *const, const std::vector<std::uint16_t> &);
void write_chunk(std::ofstream &, const std::vector<std::uint16_t> &);
#endif /* LZW_SRC_IO_H_ */

View File

@ -1,16 +1,8 @@
/**
* \file main.cc
* \brief Main file
*
*
*
*/
#include <getopt.h>
#include <cassert>
#include <tuple>
#include "compress.hh"
#include "uncompress.hh"
#include <cassert>
#include <getopt.h>
#include <tuple>
using std::printf;
using std::puts;
@ -19,41 +11,35 @@ using std::tuple;
// custom types ///////////////////////////////////////////////////////////////
/*
Dictionnaire :
<
<
numéro chaine précédente,
caractère ASCII
>
numéro chaine courante
>
*/
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
using ustring = std::basic_string<uint8_t>; // chaine non encodée
using uvec = std::vector<uint32_t>; // chaine encodée
/**
* \brief Affichage daide
*/
void help() {
puts("Usage:");
puts("lzw [-options] [-i path] [-o path]");
puts("\tThe default action is to compress the input file to a .lzw file");
puts("\tin which the directory in which the software is executed.");
puts("\tOptions available:");
puts("\t-i\tpath to the input file (mandatory)");
puts("\t-o\tpath to the output file (if the file already exists, it will");
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
puts("\t-c\tcompress the input file");
puts("\t-u\tuncompresses the input file to the output file. If no output");
puts("\t\tpath has not been entered and if the input file ends with ");
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
puts("\t\textension \".uncompresed\" will be added");
puts("Usage:\n\
lzw [-options] [-i path] [-o path]\n\n\
The default action is to compress the input file to a .lzw file\n\
in which the directory in which the software is executed.\n\
Options available:\n\
-h --help\n\
\tdisplay the current message\n\
-i --input\n\
\tpath to the input file (MANDATORY)\n\
-o --output\n\
\tpath to the output file (if the file already exists, it will be\n\n\
\toverwritten). Default: input path + \".lzw\\n\
-c --compress\n\
\tcompress the input file\n\
-u --uncompress\n\
\tuncompresses the input file to the output file. If no output path\n\
\thas not been entered and if the input file ends with \".lzw\",\n\
\tthe extension \".lzw\" will be removed; otherwise, the extension\n\
\t\"_uncompresed\" will be added");
}
std::tuple<string, string, bool> process_args(int t_argc, char *t_argv[]) {
auto ret = std::make_tuple(string{}, string{}, false);
[[nodiscard]] std::tuple<string, string, bool> process_args(int t_argc,
char *t_argv[]) {
auto ret = std::make_tuple(string{}, string{}, true);
while (true) {
int option_index = 0;
static struct option long_options[] = {
@ -64,7 +50,8 @@ std::tuple<string, string, bool> process_args(int t_argc, char *t_argv[]) {
{"uncompress", no_argument, nullptr, 'u'},
{nullptr, 0, nullptr, 0}};
int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
if (c == -1) break;
if (c == -1)
break;
switch (c) {
case 0:
break;
@ -94,10 +81,12 @@ std::tuple<string, string, bool> process_args(int t_argc, char *t_argv[]) {
return ret;
}
/* TODO: compression multiple : nombre de compressions puis fichier compressé */
int main(int argc, char *argv[]) {
const auto [input_path, output_path, compressing] = process_args(argc, argv);
assert(!input_path.empty());
if (input_path.empty()) {
help();
return 0;
}
if (compressing) {
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
} else {

View File

@ -4,6 +4,8 @@
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iterator>
using std::fclose;
using std::fopen;
@ -14,18 +16,16 @@ using std::vector;
using ustring = std::basic_string<unsigned char>;
using vuint16 = vector<uint16_t>;
ustring lzw_uncompress(vuint16 &&t_compressed) {
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
ustring ret{};
uint16_t old = 0;
std::map<uint16_t, ustring> dict{};
uint16_t v = t_compressed[0];
ret.append({static_cast<unsigned char>(v)});
old = v;
ret.append({static_cast<unsigned char>(t_compressed[0])});
old = t_compressed[0];
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
v = *it;
const auto uncompressed{dico_uncompress(dict, v, old)};
const auto uncompressed{dico_uncompress(dict, *it, old)};
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
old = v;
old = *it;
}
return ret;
@ -34,31 +34,27 @@ ustring lzw_uncompress(vuint16 &&t_compressed) {
void uncompress(const string &t_input_name, const char *t_output_name) {
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
assert(input);
FILE *const output =
(t_output_name != nullptr)
? std::fopen(t_output_name, "wb")
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
assert(output);
std::ofstream output{(t_output_name != nullptr)
? t_output_name
: t_input_name + "_uncompressed",
std::ios::out | std::ios::binary};
assert(output.is_open());
uint16_t nb_chunks = 0;
std::fseek(input, 0, SEEK_SET);
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
for (uint16_t i = 0; i < nb_chunks; ++i) {
uint32_t size_chunk = 0;
fread(&size_chunk, sizeof(size_chunk), 1, input);
auto *chunk = static_cast<unsigned char *>(
std::malloc(sizeof(unsigned char) * size_chunk));
fread(chunk, sizeof(unsigned char), size_chunk, input);
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
// sometimes will add null char
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
uncompressed_chunk.size(), output);
uncompress_chunk(input, output);
}
std::fclose(output);
output.close();
std::fclose(input);
}
void uncompress_chunk(FILE *const t_input, std::ofstream &t_output) {
uint32_t size_chunk = 0;
fread(&size_chunk, sizeof(size_chunk), 1, t_input);
auto chunk = std::make_unique<unsigned char[]>(size_chunk);
fread(chunk.get(), sizeof(unsigned char), size_chunk, t_input);
auto unpacked = unpack(ustring{chunk.get(), chunk.get() + size_chunk});
auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
t_output.write(reinterpret_cast<const char *>(uncompressed_chunk.data()),
sizeof(uncompressed_chunk[0]) * uncompressed_chunk.size());
}

View File

@ -1,13 +1,16 @@
#ifndef LZW_SRC_UNCOMPRESS_H_
#define LZW_SRC_UNCOMPRESS_H_
#include <fstream>
#include <memory>
#include <string>
#include <vector>
std::basic_string<unsigned char>
[[nodiscard]] std::basic_string<unsigned char>
lzw_uncompress(std::vector<std::uint16_t> &&);
void uncompress(const std::string &, const char*);
void uncompress(const std::string &, const char *);
void uncompress_chunk(FILE *, std::ofstream &);
#endif /* LZW_SRC_UNCOMPRESS_H_ */