Merge branch 'master' of labs.phundrak.fr:phundrak/lzw-assignment

Stripping binary from unneeded data
added nodiscard attributes
2019-08-22 17:57:03 +02:00 · 2019-08-22 17:56:58 +02:00 · 2019-08-19 16:40:13 +02:00 · 2019-06-16 17:21:11 +02:00 · 2018-11-21 10:28:13 +01:00 · 2018-11-21 01:59:02 +01:00
28 changed files with 557 additions and 3287 deletions
--- a/.bettercodehub.yml
+++ b/.bettercodehub.yml
@ -1,6 +0,0 @@
-component_depth: 2
-languages:
-    - cpp
-exclude:
-    - includes/*
-    - .*/*
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -5,4 +5,4 @@ jobs:
      - image: purplekarrot/gcc-8
    steps:
      - checkout
-      - run: cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j
+      - run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
--- a/.gitignore
+++ b/.gitignore
@ -1,9 +1,13 @@
 *~

 gmon\.out
-/cmake-build-debug/Makefile

-cmake-build-debug/
-
-\.idea/
 *.lzw
+
+\.scannerwork/
+
+bw-output/
+
+bin/
+build/
+debug/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -0,0 +1,7 @@
+image: rikorose/gcc-cmake:latest
+stages:
+  - build
+build:
+  stage: build
+  script:
+    - mkdir -p build bin && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
--- a/.travis.yml
+++ b/.travis.yml
@ -4,60 +4,48 @@ compiler: clang
 os: linux
 dist: trusty
 addons:
+    sonarcloud:
+        organization: "phundrak-github"
+        token:
+            secure: ${SONAR_TOKEN}
    apt:
+        config:
+            retries: true
        sources:
            - ubuntu-toolchain-r-test
            - llvm-toolchain-trusty-5.0
        packages:
            - g++-7
            - clang-5.0
-
+cache:
+    apt: true
 matrix:
  include:
    # Linux Clang C++17
    - env:
-        BUILD_TYPE=Debug
+        BUILD_TYPE=debug
        BIN_DIR=debug
        CC=clang-5.0
        CXX=clang++-5.0
    - env:
-        BUILD_TYPE=Release
+        BUILD_TYPE=release
        BIN_DIR=bin
        CC=clang-5.0
        CXX=clang++-5.0

    # Linux GCC C++17
    - env:
-        - MATRIX_EVAL="BUILD_TYPE=Debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
+        - MATRIX_EVAL="BUILD_TYPE=debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
      compiler: gcc
      before_install:
        - eval "${MATRIX_EVAL}"
    - env:
-        - MATRIX_EVAL="BUILD_TYPE=Release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
+        - MATRIX_EVAL="BUILD_TYPE=release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
      compiler: gcc
      before_install:
        - eval "${MATRIX_EVAL}"

-    # OSX Clang
-    - os: osx
-      osx_image: xcode9.3
-      env:
-        BUILD_TYPE=Release
-        BIN_DIR=bin
-      before_install:
-        brew update
-      install:
-        brew upgrade cmake
-    - os: osx
-      osx_image: xcode9.3
-      env:
-        BUILD_TYPE=Debug
-        BIN_DIR=debug
-      before_install:
-        brew update
-      install:
-        brew upgrade cmake
 script:
-    - cd build
-    - cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
-    - make -j
+    - make $BUILD_TYPE
+    - build-wrapper-linux-x86-64 --out-dir bw-output make clean all
+    - sonar-scanner -Dsonar.projectKey=Phundrak_lzw-assignment -Dsonar.sources=./src/ -Dsonar.cfamily.build-wrapper-output=bw-output -Dsonar.host.url=https://sonarcloud.io
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,5 @@
 cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
 set(CMAKE_LEGACY_CYGWIN_WIN32 0)
-set(CMAKE_BUILD_TYPE Debug)

 project("projet_lzw")

@ -26,8 +25,6 @@ else()
        message( FATAL_ERROR "C++17 not supported, CMake will exit." )
    endif()
 endif()
-
-# set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
 set(CMAKE_CXX_EXTENSIONS OFF)

--- a/16
+++ b/16
@ -0,0 +1,16 @@
+all:
+	@mkdir -p build
+	@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
+	@strip bin/project_lzw
+
+release:
+	@mkdir -p build
+	@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
+	@strip bin/projet_lzw
+
+debug:
+	@mkdir -p build
+	@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Debug .. && make
+
+clean:
+	rm -rf bin/* build/* debug/*
--- a/README.md
+++ b/README.md
@ -1,11 +1,25 @@
+[![CircleCI](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master.svg?style=svg)](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master)
 [![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment)
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=Phundrak/lzw-assignment&amp;utm_campaign=Badge_Grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
+[![SonarCloud Badge](https://sonarcloud.io/api/project_badges/measure?project=Phundrak_lzw-assignment&metric=alert_status)](https://sonarcloud.io/dashboard?id=Phundrak_lzw-assignment)
 [![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org)
-[![BCH compliance](https://bettercodehub.com/edge/badge/Phundrak/lzw-assignment?branch=master)](https://bettercodehub.com/)

 # LZW Compressing tool

 This is a university assignment for which I aim to create an LZW algorithm implementation to create a small tool similar to `gzip` and `gunzip` that can compress and uncompress files in a lossless fashion.

 This project is written is C++17, compiled with clang under a UNIX environment. Other compilers and environments will not be tested.
+
+## How to use it
+
+Currently, five different options are available to the user:
+- `-h` or `--help` will show how to use `projet_lzw`
+- `-c` or `--compress` tells `projet_lzw` to compress the input file
+- `-u` or `--uncompress` tells `projet_lzw` to uncompress the input file
+- `-i <file>` or `--input <file path>` specifies the input file to be compressed or uncompressed (**MANDATORY**)
+- `-o <file>` or `--output <file path>` specifies the name of the output file. If not used, the default output name for compression is `output.lzw`, and the default name for uncompressing is `<filename>_uncompressed`.
+
+By default, `projet_lzw` will uncompress the (mandatory) input file.
+
+It is planned to add in the future a sixth option, `-p` or `--passes` that will allow to compress multiple times the input file.
--- a/bin/.gitignore
+++ b/bin/.gitignore
@ -1,2 +0,0 @@
-*
-!.gitignore
--- a/build/.gitignore
+++ b/build/.gitignore
@ -1,2 +0,0 @@
-*
-!.gitignore
--- a/debug/.gitignore
+++ b/debug/.gitignore
@ -1,2 +0,0 @@
-*
-!.gitignore
--- a/doc/.gitignore
+++ b/doc/.gitignore
@ -1,3 +0,0 @@
-*
-!.gitignore
-!Doxyfile
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
--- a/includes/getopt.c
+++ b/includes/getopt.c
@ -1,244 +0,0 @@
-/*
-  Copyright 2005-2014 Rich Felker, et al.
-
-  Permission is hereby granted, free of charge, to any person obtaining
-  a copy of this software and associated documentation files (the
-  "Software"), to deal in the Software without restriction, including
-  without limitation the rights to use, copy, modify, merge, publish,
-  distribute, sublicense, and/or sell copies of the Software, and to
-  permit persons to whom the Software is furnished to do so, subject to
-  the following conditions:
-
-  The above copyright notice and this permission notice shall be
-  included in all copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#include "getopt.h"
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>
-
-char *optarg;
-int optind = 1, opterr = 1, optopt, __optpos, optreset = 0;
-
-#define optpos __optpos
-
-static void __getopt_msg(const char *a, const char *b, const char *c,
-                         size_t l) {
-  FILE *f = stderr;
-  flockfile(f);
-  fputs(a, f) >= 0 && fwrite(b, strlen(b), 1, f) &&
-      fwrite(c, 1, l, f) == l &&putc('\n', f);
-  funlockfile(f);
-}
-
-int getopt(int argc, char *const argv[], const char *optstring) {
-  int i, c, d;
-  int k, l;
-  char *optchar;
-
-  if (!optind || optreset) {
-    optreset = 0;
-    __optpos = 0;
-    optind = 1;
-  }
-
-  if (optind >= argc || !argv[optind])
-    return -1;
-
-  if (argv[optind][0] != '-') {
-    if (optstring[0] == '-') {
-      optarg = argv[optind++];
-      return 1;
-    }
-    return -1;
-  }
-
-  if (!argv[optind][1])
-    return -1;
-
-  if (argv[optind][1] == '-' && !argv[optind][2])
-    return optind++, -1;
-
-  if (!optpos)
-    optpos++;
-  c = argv[optind][optpos], k = 1;
-  optchar = argv[optind] + optpos;
-  optopt = c;
-  optpos += k;
-
-  if (!argv[optind][optpos]) {
-    optind++;
-    optpos = 0;
-  }
-
-  if (optstring[0] == '-' || optstring[0] == '+')
-    optstring++;
-
-  i = 0;
-  d = 0;
-  do {
-    d = optstring[i], l = 1;
-    if (l > 0)
-      i += l;
-    else
-      i++;
-  } while (l && d != c);
-
-  if (d != c) {
-    if (optstring[0] != ':' && opterr)
-      __getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
-    return '?';
-  }
-  if (optstring[i] == ':') {
-    if (optstring[i + 1] == ':')
-      optarg = 0;
-    else if (optind >= argc) {
-      if (optstring[0] == ':')
-        return ':';
-      if (opterr)
-        __getopt_msg(argv[0], ": option requires an argument: ", optchar, k);
-      return '?';
-    }
-    if (optstring[i + 1] != ':' || optpos) {
-      optarg = argv[optind++] + optpos;
-      optpos = 0;
-    }
-  }
-  return c;
-}
-
-static void permute(char *const *argv, int dest, int src) {
-  char **av = (char **)argv;
-  char *tmp = av[src];
-  int i;
-  for (i = src; i > dest; i--)
-    av[i] = av[i - 1];
-  av[dest] = tmp;
-}
-
-static int __getopt_long_core(int argc, char *const *argv,
-                              const char *optstring,
-                              const struct option *longopts, int *idx,
-                              int longonly) {
-  optarg = 0;
-  if (longopts && argv[optind][0] == '-' &&
-      ((longonly && argv[optind][1] && argv[optind][1] != '-') ||
-       (argv[optind][1] == '-' && argv[optind][2]))) {
-    int colon = optstring[optstring[0] == '+' || optstring[0] == '-'] == ':';
-    int i, cnt, match;
-    char *opt;
-    for (cnt = i = 0; longopts[i].name; i++) {
-      const char *name = longopts[i].name;
-      opt = argv[optind] + 1;
-      if (*opt == '-')
-        opt++;
-      for (; *name && *name == *opt; name++, opt++)
-        ;
-      if (*opt && *opt != '=')
-        continue;
-      match = i;
-      if (!*name) {
-        cnt = 1;
-        break;
-      }
-      cnt++;
-    }
-    if (cnt == 1) {
-      i = match;
-      optind++;
-      optopt = longopts[i].val;
-      if (*opt == '=') {
-        if (!longopts[i].has_arg) {
-          if (colon || !opterr)
-            return '?';
-          __getopt_msg(argv[0],
-                       ": option does not take an argument: ", longopts[i].name,
-                       strlen(longopts[i].name));
-          return '?';
-        }
-        optarg = opt + 1;
-      } else if (longopts[i].has_arg == required_argument) {
-        if (!(optarg = argv[optind])) {
-          if (colon)
-            return ':';
-          if (!opterr)
-            return '?';
-          __getopt_msg(argv[0],
-                       ": option requires an argument: ", longopts[i].name,
-                       strlen(longopts[i].name));
-          return '?';
-        }
-        optind++;
-      }
-      if (idx)
-        *idx = i;
-      if (longopts[i].flag) {
-        *longopts[i].flag = longopts[i].val;
-        return 0;
-      }
-      return longopts[i].val;
-    }
-    if (argv[optind][1] == '-') {
-      if (!colon && opterr)
-        __getopt_msg(argv[0],
-                     cnt ? ": option is ambiguous: "
-                         : ": unrecognized option: ",
-                     argv[optind] + 2, strlen(argv[optind] + 2));
-      optind++;
-      return '?';
-    }
-  }
-  return getopt(argc, argv, optstring);
-}
-
-static int __getopt_long(int argc, char *const *argv, const char *optstring,
-                         const struct option *longopts, int *idx,
-                         int longonly) {
-  int ret, skipped, resumed;
-  if (!optind || optreset) {
-    optreset = 0;
-    __optpos = 0;
-    optind = 1;
-  }
-  if (optind >= argc || !argv[optind])
-    return -1;
-  skipped = optind;
-  if (optstring[0] != '+' && optstring[0] != '-') {
-    int i;
-    for (i = optind;; i++) {
-      if (i >= argc || !argv[i])
-        return -1;
-      if (argv[i][0] == '-' && argv[i][1])
-        break;
-    }
-    optind = i;
-  }
-  resumed = optind;
-  ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly);
-  if (resumed > skipped) {
-    int i, cnt = optind - resumed;
-    for (i = 0; i < cnt; i++)
-      permute(argv, skipped, optind - 1);
-    optind = skipped + cnt;
-  }
-  return ret;
-}
-
-int getopt_long(int argc, char *const *argv, const char *optstring,
-                const struct option *longopts, int *idx) {
-  return __getopt_long(argc, argv, optstring, longopts, idx, 0);
-}
-
-int getopt_long_only(int argc, char *const *argv, const char *optstring,
-                     const struct option *longopts, int *idx) {
-  return __getopt_long(argc, argv, optstring, longopts, idx, 1);
-}
--- a/includes/getopt.h
+++ b/includes/getopt.h
@ -1,54 +0,0 @@
-/*
-  Copyright 2005-2014 Rich Felker, et al.
-
-  Permission is hereby granted, free of charge, to any person obtaining
-  a copy of this software and associated documentation files (the
-  "Software"), to deal in the Software without restriction, including
-  without limitation the rights to use, copy, modify, merge, publish,
-  distribute, sublicense, and/or sell copies of the Software, and to
-  permit persons to whom the Software is furnished to do so, subject to
-  the following conditions:
-
-  The above copyright notice and this permission notice shall be
-  included in all copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef _GETOPT_H
-#define _GETOPT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int getopt(int, char *const[], const char *);
-extern char *optarg;
-extern int optind, opterr, optopt, optreset;
-
-struct option {
-  const char *name;
-  int has_arg;
-  int *flag;
-  int val;
-};
-
-int getopt_long(int, char *const *, const char *, const struct option *, int *);
-int getopt_long_only(int, char *const *, const char *, const struct option *,
-                     int *);
-
-#define no_argument 0
-#define required_argument 1
-#define optional_argument 2
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/src/bitpack.cc
+++ b/src/bitpack.cc
@ -0,0 +1,177 @@
+#include "bitpack.hh"
+#include "common.hh"
+#include <algorithm>
+#include <cmath>
+
+using std::uint16_t;
+using std::vector;
+using uchar = unsigned char;
+using vuint16 = vector<uint16_t>;
+using vuchar = vector<uchar>;
+using ustring = std::basic_string<unsigned char>;
+
+[[nodiscard]] int max(const int t_n) { return ipow(2, t_n) - 1; }
+
+[[nodiscard]] constexpr uint16_t mask_n(int t_nb_bits) {
+  if (t_nb_bits == 0) {
+    return 0;
+  }
+  uint16_t mask = mask_n(t_nb_bits - 1);
+  mask = static_cast<uint16_t>(mask << 1);
+  mask |= 0x1;
+  return mask;
+}
+
+constexpr uint16_t masks[17] = {
+    mask_n(0),  mask_n(1),  mask_n(2),  mask_n(3),  mask_n(4),  mask_n(5),
+    mask_n(6),  mask_n(7),  mask_n(8),  mask_n(9),  mask_n(10), mask_n(11),
+    mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)};
+
+///////////////////////////////////////////////////////////////////////////////
+//                                  packing                                  //
+///////////////////////////////////////////////////////////////////////////////
+
+[[nodiscard]] vuchar pack(const vuint16 &t_input) {
+  vuchar vec{};
+  return pack_n(t_input.begin(), t_input.end(), vec, 9);
+}
+
+[[nodiscard]] vuchar pack_n(const vuint16::const_iterator t_input_begin,
+                            const vuint16::const_iterator t_input_end,
+                            vuchar &t_res, int t_n) {
+  if (t_n == 16) {
+    return pack_16(t_input_begin, t_input_end, t_res);
+  }
+  const int max_value = max(t_n); // max value held within t_n bits
+
+  int step = t_n % 8;
+  int left_shift = 0;
+  int right_shift = 0;
+  uchar current_char = 0;
+  bool char_touched = false;
+
+  // pour chaque élément
+  for (auto it = t_input_begin; it != t_input_end; ++it) {
+    // si on a atteint ou dépassé la valeur maximale, on change de nombre de
+    // bits
+    if (*it >= max_value) {
+      // écriture du masque pour notifier à la décompression du changement de
+      // bits
+      if ((left_shift += step) >= t_n) {
+        left_shift = (left_shift - t_n) + step;
+      }
+      const auto mask = masks[t_n] >> left_shift;
+      t_res.push_back(static_cast<uchar>(current_char | mask));
+      bool zero_rs = (right_shift == 0);
+
+      right_shift -= step;
+      if (right_shift < 0 && !zero_rs) {
+        // si right_shift est inférieur à zéro
+        // si right_shift était différent de zéro, alors extra octet
+        current_char = static_cast<uchar>(masks[t_n] >> (-right_shift) & 0xFFU);
+        t_res.push_back(current_char);
+      }
+      t_res.push_back(static_cast<uchar>(masks[t_n]));
+      return pack_n(it, t_input_end, t_res, t_n + 1);
+    }
+
+    // écriture normale
+    if ((left_shift += step) >= t_n) {
+      left_shift = (left_shift - t_n) + step;
+    }
+    t_res.push_back(
+        static_cast<uchar>(current_char | (*it >> left_shift & 0xFFU)));
+
+    bool zero_rs = (right_shift == 0);
+    right_shift -= step;
+    if (right_shift < 0) {
+      if (!zero_rs) {
+        current_char = static_cast<uchar>(*it >> (-right_shift) & 0xFFU);
+        t_res.push_back(current_char);
+      }
+      right_shift = 8 + right_shift;
+    }
+    if (right_shift == 0) {
+      current_char = static_cast<uchar>(*it & 0xFFU);
+      t_res.push_back(current_char);
+      current_char = 0;
+      char_touched = false;
+    } else {
+      current_char = static_cast<uchar>(*it << right_shift & 0xFFU);
+      char_touched = true;
+    }
+  }
+  if (char_touched) {
+    t_res.push_back(current_char);
+  }
+  return t_res;
+}
+
+[[nodiscard]] vuchar pack_16(const vuint16::const_iterator t_input_begin,
+                             const vuint16::const_iterator t_input_end,
+                             vuchar &t_res) {
+  std::for_each(t_input_begin, t_input_end, [&t_res](const auto value) {
+    t_res.push_back(static_cast<uchar>(value >> 8 & 0xFFU));
+    t_res.push_back(static_cast<uchar>(value & 0xFFU));
+  });
+  return t_res;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//                                 unpacking                                 //
+///////////////////////////////////////////////////////////////////////////////
+
+[[nodiscard]] vuint16 unpack(ustring &&t_input) {
+  vuint16 vec{};
+  return unpack_n(t_input.begin(), t_input.end(), vec, 9);
+}
+
+[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
+                               const ustring::const_iterator t_end,
+                               vuint16 &t_res, int t_n) {
+  if (t_n == 16) {
+    return unpack_16(t_begin, t_end, t_res);
+  }
+  int step = t_n % 8;
+  int left_shift = 0;
+  int right_shift = 0;
+  const int max_value = max(t_n);
+  for (auto it = t_begin; it < t_end - 1; /* nope */) {
+    uint16_t current_char = 0;
+    // left bits
+    if ((left_shift += step) >= t_n) {
+      left_shift = (left_shift - t_n) + step;
+    }
+    current_char = static_cast<uint16_t>(*it << left_shift) & masks[t_n];
+    // right bits
+    bool zero_rs = (right_shift == 0);
+    right_shift -= step;
+    if (right_shift < 0) {
+      // if previous right shift was negative and not zero
+      if (!zero_rs) {
+        current_char |= *++it << (-right_shift) & masks[16 + right_shift];
+      }
+      right_shift = 8 + right_shift;
+    }
+    current_char |= *++it >> right_shift & masks[8 - right_shift];
+    // char made!
+    if (current_char >= max_value) { // if it is the mask
+      return unpack_n(it + 1, t_end, t_res, t_n + 1);
+    }
+    current_char &= masks[t_n];
+    t_res.push_back(current_char);
+    if (right_shift == 0) {
+      ++it;
+    }
+  }
+  return t_res;
+}
+
+[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
+                                const ustring::const_iterator t_end,
+                                vuint16 &t_res) {
+  for (auto it = t_begin; it < t_end; ++it) {
+    t_res.push_back(static_cast<uint16_t>(*it << 8 | *++it));
+  }
+  return t_res;
+}
--- a/src/bitpack.hh
+++ b/src/bitpack.hh
@ -0,0 +1,34 @@
+#ifndef LZW_SRC_BITPACK_H_
+#define LZW_SRC_BITPACK_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+[[nodiscard]] std::vector<unsigned char>
+pack(const std::vector<std::uint16_t> &);
+
+[[nodiscard]] std::vector<unsigned char>
+pack_n(std::vector<std::uint16_t>::const_iterator,
+       std::vector<std::uint16_t>::const_iterator, std::vector<unsigned char> &,
+       int);
+
+[[nodiscard]] std::vector<unsigned char>
+pack_16(std::vector<std::uint16_t>::const_iterator,
+        std::vector<std::uint16_t>::const_iterator,
+        std::vector<unsigned char> &);
+
+[[nodiscard]] std::vector<std::uint16_t>
+unpack(std::basic_string<unsigned char> &&);
+
+[[nodiscard]] std::vector<std::uint16_t>
+unpack_n(std::basic_string<unsigned char>::const_iterator t_begin,
+         std::basic_string<unsigned char>::const_iterator t_end,
+         std::vector<std::uint16_t> &, int t_n);
+
+[[nodiscard]] std::vector<std::uint16_t>
+unpack_16(std::basic_string<unsigned char>::const_iterator,
+          std::basic_string<unsigned char>::const_iterator,
+          std::vector<std::uint16_t> &);
+
+#endif /* LZW_SRC_BITPACK_H_ */
--- a/src/common.cc
+++ b/src/common.cc
@ -1,44 +1,69 @@
-/**
- *   \file common.cc
- *   \brief Implementation for functions in common
- */
-
 #include "common.hh"

-using uint8_t = std::uint8_t;
-using uint32_t = std::uint32_t;
-using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
+using std::uint16_t;
+using std::uint8_t;
+using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
+using ustring = std::basic_string<unsigned char>;
+using p_ustring = std::shared_ptr<ustring>;

-/**
- *  Cette fonction a pour double usage la recherche d’une chaine de caractères
- *  dans le dictionnaire, ou bien l’ajout d’une nouvelle chaîne si celle-ci n’est
- *  pas déjà présente. Une chaine de caractères est représentée par un couple
- *  numéro de chaine / caractère, le numéro de chaine renvoyant au caractère
- *  précédent (soit son code ASCII, soit son indice dans le dictionnaire) et le
- *  caractère se référant au dernier caractère de la chaine courante. Si le
- *  numéro de chaine est -1, alors il s’agit du premier caractère de la chaine,
- *  et la valeur renvoyée sera la valeur ASCII du caractère. La fonction renvoie
- *  une paire bool/uint32_t, la valeur booléene indiquant si une nouvelle fut
- *  ajoutée dans le dictionnaire ou non, et le uint32_t indiquant la valeur
- *  numérique de la chaîne dans le dictionnaire.
- *
- *  \param t_dictionary Dictionnaire
- *  \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary
- *  \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
- *  \return const std::pair<bool, uint32_t>
- */
-const std::pair<bool, uint32_t>
-dico(std::map<std::pair<uint32_t, uint8_t>, uint32_t> &t_dictionary,
-     uint32_t t_nr_chaine, uint8_t t_c) {
+[[nodiscard]] int ipow(int base, int exp) {
+  int result = 1;
+  for (;;) {
+    if (exp & 1) {
+      result *= base;
+    }
+    exp >>= 1;
+    if (exp == 0) {
+      break;
+    }
+    base *= base;
+  }
+  return result;
+}
+
+[[nodiscard]] std::pair<bool, uint16_t>
+dico(dic_comp_t &t_dictionary, const uint16_t t_nr_chaine, const uint8_t t_c) {
  if (t_nr_chaine == 0xFFFF) {
    return std::make_pair(true, t_c);
  }
  auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
-  return (e != 0) ? std::make_pair(true, e)
-             : std::make_pair(
-                   false,
-                   (e = static_cast<
-                            typename std::remove_reference<decltype(e)>::type>(
-                            t_dictionary.size()) +
-                        255));
+  if (e != 0)
+    return std::make_pair(true, e);
+  e = static_cast<uint16_t>(t_dictionary.size() + 255);
+  return std::make_pair(false, e);
+}
+
+[[nodiscard]] ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
+                                      const uint16_t t_code,
+                                      const uint16_t t_old) {
+  // le code existe dans le dictionnaire s’il est < 256
+  if (t_code < 256) {
+    ustring e{static_cast<unsigned char>(t_code)};
+    // 256 car on n'a pas encore tenté d'insérer de nouveau caractère
+    if (t_old < 256) {
+      t_dict[static_cast<uint16_t>(t_dict.size() + 256)] =
+          static_cast<unsigned char>(t_old) + e;
+    } else {
+      t_dict[static_cast<uint16_t>(t_dict.size() + 256)] = t_dict[t_old] + e;
+    }
+    return e;
+  }
+
+  auto &e = t_dict[t_code];
+  auto str = (t_old < 256) ? ustring{static_cast<unsigned char>(t_old)}
+                           : t_dict[t_old];
+
+  // le code existe dans le dictionnaire
+  if (!e.empty()) {
+    str += e[0];
+    const auto index = static_cast<uint16_t>(t_dict.size() + 256);
+    t_dict[index] = str;
+    return e;
+  }
+
+  // le code n'existe pas encore dans le dictionnaire
+  str += str[0];
+  e = str;
+  t_dict[t_code] = e;
+  return e;
 }
--- a/src/common.hh
+++ b/src/common.hh
@ -1,17 +1,18 @@
-/**
- *   \file common.hh
- *   \brief Header for functions in common
- */
-
 #ifndef LZW_SRC_COMMON_H_
 #define LZW_SRC_COMMON_H_

 #include <cstdint>
 #include <map>
+#include <memory>

-/// \brief Recherche ou ajout de chaine dans le dictionnaire
-const std::pair<bool, std::uint32_t>
-dico(std::map<std::pair<std::uint32_t, std::uint8_t>, std::uint32_t> &,
-     uint32_t, uint8_t);
+[[nodiscard]] int ipow(int, int);
+
+[[nodiscard]] std::pair<bool, std::uint16_t>
+dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
+     const std::uint16_t, const std::uint8_t);
+
+[[nodiscard]] std::basic_string<unsigned char>
+dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
+                const std::uint16_t, const std::uint16_t);

 #endif /* LZW_SRC_COMMON_H_ */
--- a/src/compress.cc
+++ b/src/compress.cc
@ -1,149 +1,77 @@
-/**
- *   \file compress.cc
- *   \brief Implementation of compression
- */
-
 #include "compress.hh"
-#include "utf8.hh"
+#include "common.hh"
+#include "io.hh"
 #include <cassert>
 #include <cstdlib>
 #include <fstream>
-#include <iostream>
-#include "io.hh"
+#include <iterator>
+using std::ios;
+using std::string;
+using std::uint16_t;
+using std::uint8_t;
+using std::vector;
+using vuint16 = vector<uint16_t>;
+using vvuint16 = vector<vuint16>;
+using ustring = std::basic_string<unsigned char>;

-using dict_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
-using ustring = std::basic_string<uint8_t>; // chaîne non encodée
-using uvec = std::vector<std::uint32_t>;         // chaîne encodée
+using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
 using std::printf;

-constexpr size_t CHUNK_SIZE = 32768;
-
-/**
- *
- *  Reçoit une liste de paires std::thread/vecteurs, le premier étant le
- *  processus dont sa sortie est stockée dans le second. La sortie, une liste
- *  de caractères uint32_t, est écrite dans le fichier de sortie \p out.
- *
- *  \param[in] t_threads
- *  \param[out] t_out
- */
-void join_and_write(
-    std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> &t_threads,
-    std::vector<std::vector<std::uint32_t>> &compressed_text) {
-  for (auto &elem : t_threads) {
-    (*elem.first).join();
-  }
-  for (auto &elem : t_threads) {
-    compressed_text.push_back(std::move(elem.second));
-  }
-  t_threads.clear();
+[[nodiscard]] ustring read_file(const string &filename) {
+  std::ifstream file{filename, ios::binary};
+  assert(file);
+  file.unsetf(ios::skipws);
+  file.seekg(0, ios::end);
+  const auto file_size = file.tellg();
+  file.seekg(0, ios::beg);
+  ustring res{};
+  res.reserve(file_size);
+  res.insert(res.begin(), std::istream_iterator<unsigned char>(file),
+             std::istream_iterator<unsigned char>());
+  file.close();
+  return res;
 }

-/**
- *  La chaîne de caractère \p t_text est lue caractère par caractère, et est et
- *  selon la valeur de retour de la fonction \ref dico (permettant dans le même
- *  temps la création du dictionnaire), on rajoute un mot ou pas dans le vecteur
- *  de caractères UTF-8 représentant des mots de chars compressés. La fonction
- *  renvoie ledit vecteur de uint32_t via le paramètre \p t_res.
- *
- *  \param[in] t_text Chaîne de caractères uint8_t représentant le fichier d'entrée
- *  \param[out] t_res Chaîne de caractères de sortie
- */
-void lzw_compress(const std::vector<char> &t_text, uvec &t_res) {
-  dict_t dictionary{};
-  std::puts("Compressing...");
-  uint32_t w = 0xFFFF;
-
-  constexpr size_t DICT_MAX = 7936; /* 12 bits */
-
-  for (const auto &c : t_text) {
-    if (dictionary.size() >= DICT_MAX) {
-      t_res.push_back(static_cast<uint32_t>(w));
-      w = static_cast<uint32_t>(c);
-    } else if (const auto &[exists, pos] =
-                   dico(dictionary, w, static_cast<std::uint8_t>(c));
-               exists) {
+[[nodiscard]] vvuint16 lzw_compress(ustring &&t_text) {
+  vvuint16 res{};
+  const auto DICT_MAX = static_cast<size_t>(ipow(2, 14) - 256); /* 16 bits */
+  uint16_t w = 0xFFFF;
+  vuint16 chunk{};
+  dict_t dict{};
+  for (const auto c : t_text) {
+    if (dict.size() >= DICT_MAX) {
+      if (w != 0xFFFF) {
+        chunk.push_back(w);
+      }
+      res.push_back(chunk);
+      w = 0xFFFF;
+      chunk.clear();
+      dict.clear();
+    }
+    if (const auto &[exists, pos] = dico(dict, w, static_cast<uint8_t>(c));
+        exists) {
      w = pos;
    } else {
-      t_res.push_back(static_cast<uint32_t>(w));
-      w = static_cast<std::uint8_t>(c);
+      chunk.push_back(w);
+      w = static_cast<uint16_t>(c);
    }
  }
+  if (w != 0xFFFF) {
+    chunk.push_back(w);
+    res.push_back(std::move(chunk));
+  }
+  return res;
 }

-/**
- *  Wrapper de la fonction \ref lzw_compress gérant l'ouverture, la lecture,
- *  l'écriture et la fermeture des fichiers d’entrée et de sortie. Si \p
- *  t_out_file est nul (chemin non spécifié), il prendra alors la valeur de
- *  \p t_in_file à laquelle sera annexé l’extension `.lzw`.
- *
- *  \param[in] t_in_file Chemin vers le fichier d’entrée
- *  \param[in] t_out_file Chemin vers le fichier de sortie
- */
 void compress(const std::string &t_in_file, const char *t_out_file) {
-  // Fichier d’entrée
-  std::ifstream input_file{t_in_file};
-  if (!input_file.is_open()) {
-    std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 2
-              << ": could not open output file \"" << t_in_file
-              << "\". Aborting...\n";
-    exit(1);
-  }
-
-  // Fichier de sortie
-  FILE *out =
-      (t_out_file != nullptr) ? fopen(t_out_file, "wb") : fopen("output.lzw", "wb");
-  if (out == nullptr) {
+  std::ofstream out{(t_out_file != nullptr) ? t_out_file : "output.lzw",
+                    ios::out | ios::binary};
+  if (!out.is_open()) {
    std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
              << ": could not open output file. Aborting...\n";
-    input_file.close();
    exit(1);
  }
-
-  // collection of chunks
-  std::vector<std::vector<std::uint32_t>> compressed_text{};
-
-  // thread pool
-  std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> threads{};
-
-  // chunk chars
-  std::vector<char> chunk(CHUNK_SIZE, 0);
-  while (input_file.read(chunk.data(),
-                         static_cast<std::streamsize>(chunk.size()))) {
-    threads.emplace_back(nullptr, uvec{});
-    threads.back().second.reserve(CHUNK_SIZE);
-    threads.back().first = std::make_unique<std::thread>(
-        std::thread{lzw_compress, chunk, ref(threads.back().second)});
-    assert(threads.back().first);
-    if (threads.size() >= 8) {
-      join_and_write(threads, compressed_text);
-    }
-  }
-
-  if (!threads.empty()) {
-    join_and_write(threads, compressed_text);
-  }
-
-  if (input_file.tellg() != std::ios::end) {
-    std::puts("Leftovers, compressing...");
-    {
-      const auto prev_pos = input_file.tellg();
-      input_file.seekg(0, std::ios::end);
-      chunk.reserve(static_cast<size_t>(input_file.tellg() - prev_pos));
-      input_file.seekg(prev_pos, std::ios::beg);
-      std::istreambuf_iterator<char> itr(input_file);
-      for (std::streamoff i = 0; i < prev_pos; ++i, ++itr){
-        ;
-      }
-      chunk.assign((itr), std::istreambuf_iterator<char>());
-    }
-    uvec ret{};
-    lzw_compress(chunk, ret);
-    compressed_text.push_back(std::move(ret));
-  }
-
+  const auto compressed_text(lzw_compress(read_file(t_in_file)));
  write_file(out, compressed_text);
-
-  fclose(out);
-  input_file.close();
+  out.close();
 }
--- a/src/compress.hh
+++ b/src/compress.hh
@ -1,25 +1,14 @@
-/**
- *   \file compress.hh
- *   \brief Header for compression functions
- */
-
 #ifndef LZW_SRC_COMPRESS_H_
 #define LZW_SRC_COMPRESS_H_

 #include "common.hh"
-#include <vector>
 #include <iostream>
 #include <thread>
+#include <vector>

-/// \brief Exécution des threads et écriture de leur résultat dans le fichier de sortie
-void join_and_write(std::vector<std::pair<std::unique_ptr<std::thread>,
-                                          std::vector<std::uint32_t>>> &,
-                    std::vector<std::vector<std::uint32_t>> &);
+[[nodiscard]] std::vector<std::vector<std::uint16_t>>
+lzw_compress(std::basic_string<unsigned char> &&);

-/// \brief Compression d'une chaine de caractères
-void lzw_compress(const std::vector<char> &, std::vector<std::uint32_t> &);
-
-/// \brief Wrapper de \ref lzw_compress
 void compress(const std::string &, const char *);

 #endif /* LZW_SRC_COMPRESS_H_ */
--- a/src/io.cc
+++ b/src/io.cc
@ -1,95 +1,27 @@
-/**
- *   \file io.cc
- *   \brief Body for file reading and writing
- */
-
 #include "io.hh"
+#include "bitpack.hh"
+#include <array>

+using std::uint16_t;
+using std::vector;
+using vuint16 = vector<uint16_t>;
+using vvuint16 = vector<vuint16>;
+
+void write_file(std::ofstream &t_out, const vvuint16 &t_chunks) {
+  const auto nr_chunks = static_cast<uint16_t>(t_chunks.size());
 #ifdef Debug
-constexpr bool debug_mode = true;
-#else
-constexpr bool debug_mode = false;
+  std::printf("Number of chunks: %u\n", nr_chunks);
 #endif
-
-/**
- *  Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de
- *  sortie est composé des éléments suivants :\n
- *  - Sur quatre octets sont écrit un `uint32_t` déterminant la taille d'un
- *    caractère\n
- *  - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de chunk
- *    composant le fichier\n
- *  - Sont ensuite écrits les chunks sur des nombres d’octets variable suivant
- *    la taille d’un caractère et le nombre de caractères\n
- *  \n
- *  Un chunk est composé de la manière qui suit :\n
- *  - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de
- *    caractères composant le chunk\n
- *  - Les caractères composant le chunk, accolés les uns au autres. Si le
- *    dernier caractère ne remplis pas le dernier octet du chunk, alors ces
- *    derniers bits seront initialisés à 0.\n
- *  La taille d’un chunk est donc la taille d’un caractère multiplié par le
- *  nombre de caractères du chunk, le tout divisé par 8. Si le résultat n’est
- *  pas un entier, alors il est nivelé vers le haut pour avoir un nombre entier
- *  d’octets (e.g. si le résultat est 103.4, alors 104 octets seront utilisés).
- *
- *  \param[out] t_out Fichier de sortie
- *  \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out
- */
-void write_file(FILE *t_out, std::vector<std::vector<std::uint32_t>> &t_text) {
-  {
-    uint32_t char_size = 12;
-    if constexpr (debug_mode) {
-      std::printf("Char size: %u\n", char_size);
-    }
-    fwrite(&char_size, sizeof(uint32_t), 1, t_out);
-    auto size = static_cast<uint32_t>(t_text.size());
-    if constexpr (debug_mode) {
-      std::printf("Number of chunks: %u\n", size);
-    }
-    fwrite(&size, sizeof(uint32_t), 1, t_out);
-  }
-  for(const auto &chunk : t_text) {
-    // write size of chunk in uint32_t
-    {
-      auto size = static_cast<uint32_t>(chunk.size());
-      if constexpr (debug_mode) {
-        std::printf("Size of chunk: %u\n", size);
-      }
-      fwrite(&size, sizeof(uint32_t), 1, t_out);
-    }
-    uint8_t remainder = 0x00;
-    for(size_t i = 0; i < chunk.size(); ++i) {
-      if(i % 2 == 0) {
-        // char = xxxx xxxx xxxx
-        //        ^^^^^^^^^ ^^^^
-        //          write   keep in remainder as xxxx0000
-        auto temp = static_cast<unsigned char>(chunk[i] >> 4);
-        fwrite(&temp, sizeof(temp), 1, t_out);
-        if constexpr (debug_mode) {
-          std::printf("writing: %x\t\t", temp);
-        }
-        remainder = static_cast<uint8_t>(chunk[i] << 4);
-      } else {
-        // already have `remainder = yyyy0000`
-        //          char = xxxx xxxx xxxx
-        //                 ^^^^ ^^^^^^^^^
-        // remainder = yyyyxxxx   write after remainder
-        // remainder = 00000000
-        remainder &= static_cast<unsigned char>(chunk[i]) >> 8 & 0xF0;
-        fwrite(&remainder, sizeof(remainder), 1, t_out);
-        if constexpr (debug_mode) {
-          std::printf("writing remainder: %x\t\t", remainder);
-        }
-        auto temp = static_cast<unsigned char>(chunk[i]);
-        fwrite(&temp, sizeof(temp), 1, t_out);
-        if constexpr (debug_mode) {
-          std::printf("writing: %x\n", temp);
-        }
-        remainder = 0x00;
-      }
-    }
-    if(remainder != 0) {
-      fwrite(&remainder, sizeof(remainder), 1, t_out);
-    }
+  t_out.write(reinterpret_cast<const char *>(&nr_chunks), sizeof(nr_chunks));
+  for (const auto &chunk : t_chunks) {
+    write_chunk(t_out, chunk);
  }
 }
+
+void write_chunk(std::ofstream &t_out, const vuint16 &t_chunk) {
+  const auto output = pack(t_chunk);
+  const auto chunk_size = static_cast<uint32_t>(output.size());
+  t_out.write(reinterpret_cast<const char *>(&chunk_size), sizeof(chunk_size));
+  t_out.write(reinterpret_cast<const char *>(output.data()),
+              sizeof(output[0]) * output.size());
+}
--- a/src/io.hh
+++ b/src/io.hh
@ -1,31 +1,15 @@
-/**
- *   \file io.h
- *   \brief Header for file reading and writing
- */
-
 #ifndef LZW_SRC_IO_H_
 #define LZW_SRC_IO_H_

-#include <cstdio>
 #include <cstdint>
+#include <cstdio>
+#include <fstream>
+#include <iostream>
 #include <vector>

-/*
- * Un fichier compressé se compose ainsi :
- * char_size : taille d'un caractère en bits (1B)
- * nb_chunk : nombre de chunks (4B)
- * chunks* : chunks
- *
- * Un chunk se compose ainsi :
- * nb_char_chunk : nombre de caractères du chunk (2B)
- * text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) / 8))
- *
- * Si le dernier caractère ne termine pas le dernier octet du chunk, les
- * derniers bits sont mit à zéro
- */
+void write_file(std::ofstream &,
+                const std::vector<std::vector<std::uint16_t>> &);

-
-/// \brief Écrit dans le fichier le texte compressé
-void write_file(FILE *, std::vector<std::vector<std::uint32_t>> &);
+void write_chunk(std::ofstream &, const std::vector<std::uint16_t> &);

 #endif /* LZW_SRC_IO_H_ */
--- a/src/main.cc
+++ b/src/main.cc
@ -1,68 +1,45 @@
-/**
- *   \file main.cc
- *   \brief Main file
- *
- *
- *
- */
-
-#ifdef Debug
-constexpr bool debug_mode = true;
-#else
-constexpr bool debug_mode = false;
-#endif
-
 #include "compress.hh"
-#include "getopt.h"
+#include "uncompress.hh"
+#include <cassert>
+#include <getopt.h>
+#include <tuple>

 using std::printf;
 using std::puts;
+using std::string;
+using std::tuple;

 // custom types ///////////////////////////////////////////////////////////////

-/*
-  Dictionnaire :
-  <
-      <
-          numéro chaine précédente,
-          caractère ASCII
-      >
-      numéro chaine courante
-  >
- */
 using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
 using ustring = std::basic_string<uint8_t>; // chaine non encodée
 using uvec = std::vector<uint32_t>;         // chaine encodée

-/**
- *  \brief Affichage d’aide
- */
 void help() {
-  puts("Usage:");
-  puts("lzw  [-options] [-i path] [-o path]");
-  puts("\tThe default action is to compress the input file to a .lzw file");
-  puts("\tin which the directory in which the software is executed.");
-  puts("\tOptions available:");
-  puts("\t-i\tpath to the input file (mandatory)");
-  puts("\t-o\tpath to the output file (if the file already exists, it will");
-  puts("\t\tbe overwritten). Default: input path + \".lzw\"");
-  puts("\t-c\tcompress the input file");
-  puts("\t-d\tdecompresses the input file to the output file. If no output");
-  puts("\t\tpath has not been entered and if the input file ends with ");
-  puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
-  puts("\t\textension \".uncompresed\" will be added");
+  puts("Usage:\n\
+lzw  [-options] [-i path] [-o path]\n\n\
+The default action is to compress the input file to a .lzw file\n\
+in which the directory in which the software is executed.\n\
+Options available:\n\
+-h --help\n\
+\tdisplay the current message\n\
+-i --input\n\
+\tpath to the input file (MANDATORY)\n\
+-o --output\n\
+\tpath to the output file (if the file already exists, it will be\n\n\
+\toverwritten). Default: input path + \".lzw\\n\
+-c --compress\n\
+\tcompress the input file\n\
+-u --uncompress\n\
+\tuncompresses the input file to the output file. If no output path\n\
+\thas not been entered and if the input file ends with \".lzw\",\n\
+\tthe extension \".lzw\" will be removed; otherwise, the extension\n\
+\t\"_uncompresed\" will be added");
 }

-int main(int argc, char *argv[]) {
-  if constexpr (debug_mode) {
-    for (int i = 0; i < argc; ++i)
-      printf("argv[%d] = %s\n", i, argv[i]);
-  }
-
-  std::string input_path{};
-  std::string output_path{};
-  bool compressing = true;
-
+[[nodiscard]] std::tuple<string, string, bool> process_args(int t_argc,
+                                                            char *t_argv[]) {
+  auto ret = std::make_tuple(string{}, string{}, true);
  while (true) {
    int option_index = 0;
    static struct option long_options[] = {
@ -72,97 +49,49 @@ int main(int argc, char *argv[]) {
        {"compress", no_argument, nullptr, 'c'},
        {"uncompress", no_argument, nullptr, 'u'},
        {nullptr, 0, nullptr, 0}};
-    int c = getopt_long(argc, argv, "hi:o:cu", long_options, &option_index);
+    int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
    if (c == -1)
      break;
    switch (c) {
-    case 0: {
-      if constexpr (debug_mode) {
-        printf("\noption %s", long_options[option_index].name);
-        if (optarg) {
-          printf(" with arg %s\n", optarg);
-        }
-      }
+    case 0:
      break;
-    }
-    case 'h': {
-      if constexpr (debug_mode) {
-        printf("From main - option --help passed\n");
-      }
+    case 'h':
      help();
-      return 0;
-    }
-    case 'i': {
-      if constexpr (debug_mode) {
-        printf("From main - option --input with value '%s'\n", optarg);
-      }
-      input_path = optarg;
+      exit(0);
+    case 'i':
+      std::get<0>(ret) = optarg;
      break;
-    }
-    case 'o': {
-      if constexpr (debug_mode) {
-        printf("From main - option --output with value '%s'\n", optarg);
-      }
-      output_path = optarg;
+    case 'o':
+      std::get<1>(ret) = optarg;
      break;
-    }
-    case 'c': {
-      if constexpr (debug_mode) {
-        printf("From main - option --compress\n");
-      }
-      compressing = true;
+    case 'c':
+      std::get<2>(ret) = true;
      break;
-    }
-    case 'u': {
-      if constexpr (debug_mode) {
-        printf("From main - option --uncompress\n");
-      }
-      compressing = false;
+    case 'u':
+      std::get<2>(ret) = false;
      break;
-    }
    case '?':
-    default: {
+      [[fallthrough]];
+    default:
      puts("Error: unknown parameter.");
-      if constexpr (debug_mode) {
-        printf("From main - option -?\n");
-      }
      help();
-      return 1;
-    }
+      exit(1);
    }
  }
+  return ret;
+}

+int main(int argc, char *argv[]) {
+  const auto [input_path, output_path, compressing] = process_args(argc, argv);
  if (input_path.empty()) {
-    puts("Error: no input file specified");
-    return 2;
+    help();
+    return 0;
  }
-
  if (compressing) {
-    /*
-      TODO:
-      - compresser le fichier d’entrée morceaux par morceaux, 16Ko à la fois
-      - écrire la taille du segment compressé, puis le segment compressé
-      - multithreading
-      - compression multiple : nombre de compressions puis fichier compressé
-      - bit-packing, limiter la taille du dictionnaire pour un certain nombre de
-        bits.
-     */
-    if constexpr (debug_mode) {
-      puts("Beginning compression");
-    }
-    if (output_path.empty()) {
-      compress(input_path, nullptr);
-    } else {
-      compress(input_path, output_path.c_str());
-    }
-    // compress(input_path, output_path.c_str());
+    compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
  } else {
-    puts("Not yet implemented :(");
-    /*
-      Inversion des types du dictionnaire pour retrouver les chaînes plus
-      aisément
-     */
+    uncompress(input_path,
+               (output_path.empty()) ? nullptr : output_path.c_str());
  }
-
  return 0;
 }
--- a/src/uncompress.cc
+++ b/src/uncompress.cc
@ -0,0 +1,60 @@
+#include "uncompress.hh"
+#include "bitpack.hh"
+#include "common.hh"
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+
+using std::fclose;
+using std::fopen;
+using std::fseek;
+using std::string;
+using std::uint16_t;
+using std::vector;
+using ustring = std::basic_string<unsigned char>;
+using vuint16 = vector<uint16_t>;
+
+[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
+  ustring ret{};
+  uint16_t old = 0;
+  std::map<uint16_t, ustring> dict{};
+  ret.append({static_cast<unsigned char>(t_compressed[0])});
+  old = t_compressed[0];
+  for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
+    const auto uncompressed{dico_uncompress(dict, *it, old)};
+    ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
+    old = *it;
+  }
+
+  return ret;
+}
+
+void uncompress(const string &t_input_name, const char *t_output_name) {
+  FILE *const input = std::fopen(t_input_name.c_str(), "rb");
+  assert(input);
+  std::ofstream output{(t_output_name != nullptr)
+                           ? t_output_name
+                           : t_input_name + "_uncompressed",
+                       std::ios::out | std::ios::binary};
+  assert(output.is_open());
+  uint16_t nb_chunks = 0;
+  std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
+  for (uint16_t i = 0; i < nb_chunks; ++i) {
+    uncompress_chunk(input, output);
+  }
+  output.close();
+  std::fclose(input);
+}
+
+void uncompress_chunk(FILE *const t_input, std::ofstream &t_output) {
+  uint32_t size_chunk = 0;
+  fread(&size_chunk, sizeof(size_chunk), 1, t_input);
+  auto chunk = std::make_unique<unsigned char[]>(size_chunk);
+  fread(chunk.get(), sizeof(unsigned char), size_chunk, t_input);
+  auto unpacked = unpack(ustring{chunk.get(), chunk.get() + size_chunk});
+  auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
+  t_output.write(reinterpret_cast<const char *>(uncompressed_chunk.data()),
+                 sizeof(uncompressed_chunk[0]) * uncompressed_chunk.size());
+}
--- a/src/uncompress.hh
+++ b/src/uncompress.hh
@ -0,0 +1,16 @@
+#ifndef LZW_SRC_UNCOMPRESS_H_
+#define LZW_SRC_UNCOMPRESS_H_
+
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+[[nodiscard]] std::basic_string<unsigned char>
+lzw_uncompress(std::vector<std::uint16_t> &&);
+
+void uncompress(const std::string &, const char *);
+
+void uncompress_chunk(FILE *, std::ofstream &);
+
+#endif /* LZW_SRC_UNCOMPRESS_H_ */
--- a/src/utf8.cc
+++ b/src/utf8.cc
@ -1,52 +0,0 @@
-/**
- *   \file utf8.cc
- *   \brief Implementation for UTF-8 related functions
- */
-
-#include "utf8.hh"
-#include <string>
-
-using FILE = std::FILE;
-using uint8_t = std::uint8_t;
-using uint32_t = std::uint32_t;
-using ustring = std::basic_string<uint8_t>; // chaine non encodée
-
-/**
- *  Les caractères \c passés en argument sont écrit dans le fichier de sortie au
- *  format UTF-8
- *
- *  \param[in] out Fichier de sortie
- *  \param[in] c Caractères à écrire dans \p out
- */
-void write_utf8(FILE* t_out, uint32_t t_c) {
-  if(t_c < 128) {
-    fwrite(&t_c, sizeof(unsigned char), 1, t_out);
-    return;
-  }
-  size_t loops = 0;
-  unsigned char header = 0;
-  if (t_c < 2048) {
-    loops = 1;
-    header = 0xC0;
-  } else if (t_c < 65536) {
-    loops = 2;
-    header = 0xE0;
-  } else if (t_c < 2097152) {
-    loops = 3;
-    header = 0xF0;
-  } else if (t_c < 67108864) {
-    loops = 4;
-    header = 0xF8;
-  } else {
-    loops = 5;
-    header = 0xFC;
-  }
-
-  ustring str(loops + 1, 0);
-  for (size_t i = 0; i <= loops; ++i) {
-    str[i] = static_cast<unsigned char>(
-        ((t_c & ((i == loops) ? 0x3F : 0xFF)) >> ((loops - i) * 6)) +
-        ((i == 0) ? header : 0x80));
-  }
-  fwrite(str.data(), sizeof(unsigned char), str.size(), t_out);
-}
--- a/src/utf8.hh
+++ b/src/utf8.hh
@ -1,26 +0,0 @@
-/**
- *   \file utf8.hh
- *   \brief Header for UTF-8 related functions
- */
-
-#ifndef LZW_SRC_UTF8_H_
-#define LZW_SRC_UTF8_H_
-
-#include <cstdio>
-#include <cstdint>
-
-/*
-  L’encodage des caractères se fait en UTF-8
-  char < 128 => "0xxxxxxx" 7bits
-  char < 2,048 => "110xxxxx 10xxxxxx" 11bits
-  char < 65,536 => "1110xxxx 10xxxxxx 10xxxxxx" 16bits
-  char < 2,097,152 => "11110xxx 10xxxxxx 10xxxxxx 10xxxxxx" 21bits
-  char < 67,108,864 => "111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 26bits
-  char < 2,147,483,648 => "1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 31bits
-*/
-
-
-/// \brief Écrit les caractères au format UTF-8
-void write_utf8(std::FILE* t_out, std::uint32_t t_c);
-
-#endif /* LZW_SRC_UTF8_H_ */
Author	SHA1	Message	Date
Phuntsok Drak-pa	bbfb669781	Merge branch 'master' of labs.phundrak.fr:phundrak/lzw-assignment	2019-08-22 17:57:03 +02:00
Phuntsok Drak-pa	e863923b0d	Stripping binary from unneeded data	2019-08-22 17:56:58 +02:00
Phuntsok Drak-pa	412089e653	added nodiscard attributes	2019-08-19 16:40:13 +02:00
Phuntsok Drak-pa	766e9ceb41	better gitignore	2019-06-16 17:21:11 +02:00
Phuntsok Drak-pa	f763f476fb	reduced usage of puts	2018-11-21 10:28:13 +01:00
Phuntsok Drak-pa	946350c88b	help message instead of error in case of no input path	2018-11-21 01:59:02 +01:00
Phuntsok Drak-pa	fdcdc6519a	better code quality	2018-11-21 01:49:30 +01:00
Phuntsok Drak-pa	988dbcaa87	woops, forgot these lines	2018-11-21 01:21:13 +01:00
Phuntsok Drak-pa	631bca6d8f	fix building issue with travis I hope	2018-11-21 01:13:37 +01:00
Phuntsok Drak-pa	db9aa784ef	travis update	2018-11-21 01:05:46 +01:00
Phuntsok Drak-pa	b3ea126e42	added cache for apt with travis	2018-11-21 00:59:02 +01:00
Phuntsok Drak-pa	ba4706cec4	travis update	2018-11-21 00:56:35 +01:00
Phuntsok Drak-pa	4192e9789d	Merge branch 'master' of github.com:Phundrak/lzw-assignment	2018-11-21 00:53:55 +01:00
Phuntsok Drak-pa	652146a93a	testing cloudsonar with travis	2018-11-21 00:52:03 +01:00
Phuntsok Drak-pa	834504c886	Update README.md	2018-11-21 00:38:26 +01:00
Phuntsok Drak-pa	2b932f3e1a	added gitlabCI config file	2018-11-02 14:41:43 +01:00
Phuntsok Drak-pa	6d4e29b030	cleaned a bit and made general Makefile	2018-06-24 18:59:17 +02:00
Phuntsok Drak-pa	8e23eb858e	BUG FIXED, DOBBY IS FREEEEEE!!!	2018-06-24 18:34:43 +02:00
Phuntsok Drak-pa	3e9d94d865	did not mean for this line to be commited	2018-06-24 18:08:50 +02:00
Phuntsok Drak-pa	0523fe77f2	Bug identified, first char of new chunk not witten (see uncompress.cc:59)	2018-06-24 18:03:09 +02:00
Phuntsok Drak-pa	e01334a566	removed Doxygen from release build	2018-06-24 18:02:39 +02:00
Phuntsok Drak-pa	4c212907c2	bugfixes	2018-06-21 17:38:51 +02:00
Phuntsok Drak-pa	94435ef305	updated help message	2018-06-18 18:04:07 +02:00
Phuntsok Drak-pa	d3ea93d13e	Updated README.md	2018-06-18 17:53:38 +02:00
Phuntsok Drak-pa	8b8032e533	hopefully will fix Travis-CI builds	2018-06-18 17:10:49 +02:00
Phuntsok Drak-pa	5830f4225c	bugfix: fixed writing mask for charsize increase	2018-06-18 16:51:38 +02:00
Phuntsok Drak-pa	8777183821	better, but still some bugs	2018-06-17 06:38:57 +02:00
Phuntsok Drak-pa	bae6d01bc8	fixed new bitpack size detection	2018-06-15 23:08:17 +02:00
Phuntsok Drak-pa	79770384c1	Deleted unused files	2018-06-15 19:58:25 +02:00
Phuntsok Drak-pa	9b892e12ee	OSX deactivated for now	2018-06-15 19:54:47 +02:00
Phuntsok Drak-pa	2b3e5e3f2b	Fixed bit-packing bug	2018-06-15 19:54:00 +02:00
Phuntsok Drak-pa	5b9f3ccd6a	forgot to change this line	2018-06-11 22:11:58 +02:00
Phuntsok Drak-pa	6c3fb8b6a7	removed [[nodiscard]] and changed chunk binary format	2018-06-11 21:26:05 +02:00
Phuntsok Drak-pa	67a88aaf91	bug fixed as input stream was char and not unsigned char	2018-06-11 21:02:59 +02:00
Phuntsok Drak-pa	ec85a4b978	Some fixes, dictionary still broken	2018-06-11 16:34:35 +02:00
Phuntsok Drak-pa	e54e5fa07d	Bit-unpacking FIXEDDDDDDDDDDDD	2018-06-11 00:58:01 +02:00
Phuntsok Drak-pa	5eb33fb04f	I think I fixed the lzw algo, but somehow uncompression still broken	2018-06-10 23:44:10 +02:00
Phuntsok Drak-pa	fcfe944c5d	Added (broken) uncompressing algo + fixed bit-unpacking	2018-06-10 21:21:06 +02:00
Phuntsok Drak-pa	9f70b01886	Moved ipow function	2018-06-09 23:11:27 +02:00
Phuntsok Drak-pa	f8b493de2b	added bit-unpacking algorithm	2018-06-09 22:59:11 +02:00
Phuntsok Drak-pa	1909b52c32	bugfix	2018-06-09 11:00:34 +02:00
Phuntsok Drak-pa	8e04f74bba	bugfix	2018-06-09 03:30:37 +02:00
Phuntsok Drak-pa	6d779d8606	Changed file writing method to dynamic bit-packing	2018-06-09 03:18:28 +02:00
Phuntsok Drak-pa	d901cd60f2	removed bettercode	2018-06-09 02:50:48 +02:00
Phuntsok Drak-pa	cbd6d89234	Added function for bit-packing compression-side	2018-06-09 02:44:03 +02:00
Phuntsok Drak-pa	b807ee259e	changed some types	2018-06-05 11:38:27 +02:00
Phuntsok Drak-pa	816b5cf1a7	update CircleCI	2018-05-25 13:39:55 +02:00
Phuntsok Drak-pa	3413bc9372	Added doxygen and graphviz to release builds on TravisCI and CircleCI	2018-05-25 13:36:21 +02:00
Phuntsok Drak-pa	74804a1ead	Bugfixes and additional changes	2018-05-25 13:28:37 +02:00
Phuntsok Drak-pa	3bd2e15c76	removed unused prototype and header	2018-05-25 13:22:51 +02:00
Lucien Cartier-Tilet	c3ec86de87	Added documentation generation as CMake target for Release	2018-05-25 13:20:46 +02:00
Phuntsok Drak-pa	5ee86709ee	I'm so stupid omb...	2018-05-25 12:13:21 +02:00
Phuntsok Drak-pa	44434df096	Code cleanup	2018-05-25 12:13:12 +02:00
Phuntsok Drak-pa	72c71c306f	Changed I/O, compression behavior, removed threads	2018-05-25 12:00:58 +02:00
Phuntsok Drak-pa	2cfb560153	For some reason it worked before but not anymore	2018-05-25 12:00:30 +02:00
Phuntsok Drak-pa	c2f3621815	updated bettercodehub settings	2018-05-23 22:54:41 +02:00
Phuntsok Drak-pa	2366693069	updated bettercodehub settings	2018-05-23 22:51:27 +02:00
Phuntsok Drak-pa	e5c6ce7afc	Added CircleCI badge	2018-05-23 18:11:41 +02:00
Phuntsok Drak-pa	440b38bf22	Merge pull request #4 from Phundrak/circle-ci Circle ci	2018-05-23 18:03:00 +02:00