Compare commits
	
		
			No commits in common. "master" and "circle-ci" have entirely different histories.
		
	
	
		
	
		
							
								
								
									
										6
									
								
								.bettercodehub.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								.bettercodehub.yml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,6 @@
 | 
			
		||||
component_depth: 2
 | 
			
		||||
languages:
 | 
			
		||||
    - cpp
 | 
			
		||||
exclude:
 | 
			
		||||
    - includes/*
 | 
			
		||||
    - .*/*
 | 
			
		||||
@ -5,4 +5,4 @@ jobs:
 | 
			
		||||
      - image: purplekarrot/gcc-8
 | 
			
		||||
    steps:
 | 
			
		||||
      - checkout
 | 
			
		||||
      - run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
 | 
			
		||||
      - run: cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										12
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,13 +1,9 @@
 | 
			
		||||
*~
 | 
			
		||||
 | 
			
		||||
gmon\.out
 | 
			
		||||
/cmake-build-debug/Makefile
 | 
			
		||||
 | 
			
		||||
cmake-build-debug/
 | 
			
		||||
 | 
			
		||||
\.idea/
 | 
			
		||||
*.lzw
 | 
			
		||||
 | 
			
		||||
\.scannerwork/
 | 
			
		||||
 | 
			
		||||
bw-output/
 | 
			
		||||
 | 
			
		||||
bin/
 | 
			
		||||
build/
 | 
			
		||||
debug/
 | 
			
		||||
 | 
			
		||||
@ -1,7 +0,0 @@
 | 
			
		||||
image: rikorose/gcc-cmake:latest
 | 
			
		||||
stages:
 | 
			
		||||
  - build
 | 
			
		||||
build:
 | 
			
		||||
  stage: build
 | 
			
		||||
  script:
 | 
			
		||||
    - mkdir -p build bin && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
 | 
			
		||||
							
								
								
									
										42
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										42
									
								
								.travis.yml
									
									
									
									
									
								
							@ -4,48 +4,60 @@ compiler: clang
 | 
			
		||||
os: linux
 | 
			
		||||
dist: trusty
 | 
			
		||||
addons:
 | 
			
		||||
    sonarcloud:
 | 
			
		||||
        organization: "phundrak-github"
 | 
			
		||||
        token:
 | 
			
		||||
            secure: ${SONAR_TOKEN}
 | 
			
		||||
    apt:
 | 
			
		||||
        config:
 | 
			
		||||
            retries: true
 | 
			
		||||
        sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
            - llvm-toolchain-trusty-5.0
 | 
			
		||||
        packages:
 | 
			
		||||
            - g++-7
 | 
			
		||||
            - clang-5.0
 | 
			
		||||
cache:
 | 
			
		||||
    apt: true
 | 
			
		||||
 | 
			
		||||
matrix:
 | 
			
		||||
  include:
 | 
			
		||||
    # Linux Clang C++17
 | 
			
		||||
    - env:
 | 
			
		||||
        BUILD_TYPE=debug
 | 
			
		||||
        BUILD_TYPE=Debug
 | 
			
		||||
        BIN_DIR=debug
 | 
			
		||||
        CC=clang-5.0
 | 
			
		||||
        CXX=clang++-5.0
 | 
			
		||||
    - env:
 | 
			
		||||
        BUILD_TYPE=release
 | 
			
		||||
        BUILD_TYPE=Release
 | 
			
		||||
        BIN_DIR=bin
 | 
			
		||||
        CC=clang-5.0
 | 
			
		||||
        CXX=clang++-5.0
 | 
			
		||||
 | 
			
		||||
    # Linux GCC C++17
 | 
			
		||||
    - env:
 | 
			
		||||
        - MATRIX_EVAL="BUILD_TYPE=debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
 | 
			
		||||
        - MATRIX_EVAL="BUILD_TYPE=Debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
 | 
			
		||||
      compiler: gcc
 | 
			
		||||
      before_install:
 | 
			
		||||
        - eval "${MATRIX_EVAL}"
 | 
			
		||||
    - env:
 | 
			
		||||
        - MATRIX_EVAL="BUILD_TYPE=release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
 | 
			
		||||
        - MATRIX_EVAL="BUILD_TYPE=Release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
 | 
			
		||||
      compiler: gcc
 | 
			
		||||
      before_install:
 | 
			
		||||
        - eval "${MATRIX_EVAL}"
 | 
			
		||||
 | 
			
		||||
    # OSX Clang
 | 
			
		||||
    - os: osx
 | 
			
		||||
      osx_image: xcode9.3
 | 
			
		||||
      env:
 | 
			
		||||
        BUILD_TYPE=Release
 | 
			
		||||
        BIN_DIR=bin
 | 
			
		||||
      before_install:
 | 
			
		||||
        brew update
 | 
			
		||||
      install:
 | 
			
		||||
        brew upgrade cmake
 | 
			
		||||
    - os: osx
 | 
			
		||||
      osx_image: xcode9.3
 | 
			
		||||
      env:
 | 
			
		||||
        BUILD_TYPE=Debug
 | 
			
		||||
        BIN_DIR=debug
 | 
			
		||||
      before_install:
 | 
			
		||||
        brew update
 | 
			
		||||
      install:
 | 
			
		||||
        brew upgrade cmake
 | 
			
		||||
script:
 | 
			
		||||
    - make $BUILD_TYPE
 | 
			
		||||
    - build-wrapper-linux-x86-64 --out-dir bw-output make clean all
 | 
			
		||||
    - sonar-scanner -Dsonar.projectKey=Phundrak_lzw-assignment -Dsonar.sources=./src/ -Dsonar.cfamily.build-wrapper-output=bw-output -Dsonar.host.url=https://sonarcloud.io
 | 
			
		||||
    - cd build
 | 
			
		||||
    - cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
 | 
			
		||||
    - make -j
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
 | 
			
		||||
set(CMAKE_LEGACY_CYGWIN_WIN32 0)
 | 
			
		||||
set(CMAKE_BUILD_TYPE Debug)
 | 
			
		||||
 | 
			
		||||
project("projet_lzw")
 | 
			
		||||
 | 
			
		||||
@ -25,6 +26,8 @@ else()
 | 
			
		||||
        message( FATAL_ERROR "C++17 not supported, CMake will exit." )
 | 
			
		||||
    endif()
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# set(CMAKE_CXX_STANDARD 17)
 | 
			
		||||
set(CMAKE_CXX_STANDARD_REQUIRED YES)
 | 
			
		||||
set(CMAKE_CXX_EXTENSIONS OFF)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										16
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								Makefile
									
									
									
									
									
								
							@ -1,16 +0,0 @@
 | 
			
		||||
all:
 | 
			
		||||
	@mkdir -p build
 | 
			
		||||
	@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
 | 
			
		||||
	@strip bin/project_lzw
 | 
			
		||||
 | 
			
		||||
release:
 | 
			
		||||
	@mkdir -p build
 | 
			
		||||
	@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
 | 
			
		||||
	@strip bin/projet_lzw
 | 
			
		||||
 | 
			
		||||
debug:
 | 
			
		||||
	@mkdir -p build
 | 
			
		||||
	@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Debug .. && make
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
	rm -rf bin/* build/* debug/*
 | 
			
		||||
							
								
								
									
										16
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								README.md
									
									
									
									
									
								
							@ -1,25 +1,11 @@
 | 
			
		||||
[](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master)
 | 
			
		||||
[](https://travis-ci.org/Phundrak/lzw-assignment)
 | 
			
		||||
[](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade)
 | 
			
		||||
[](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
 | 
			
		||||
[](https://sonarcloud.io/dashboard?id=Phundrak_lzw-assignment)
 | 
			
		||||
[](http://spacemacs.org)
 | 
			
		||||
[](https://bettercodehub.com/)
 | 
			
		||||
 | 
			
		||||
# LZW Compressing tool
 | 
			
		||||
 | 
			
		||||
This is a university assignment for which I aim to create an LZW algorithm implementation to create a small tool similar to `gzip` and `gunzip` that can compress and uncompress files in a lossless fashion.
 | 
			
		||||
 | 
			
		||||
This project is written is C++17, compiled with clang under a UNIX environment. Other compilers and environments will not be tested.
 | 
			
		||||
 | 
			
		||||
## How to use it
 | 
			
		||||
 | 
			
		||||
Currently, five different options are available to the user:
 | 
			
		||||
- `-h` or `--help` will show how to use `projet_lzw`
 | 
			
		||||
- `-c` or `--compress` tells `projet_lzw` to compress the input file
 | 
			
		||||
- `-u` or `--uncompress` tells `projet_lzw` to uncompress the input file
 | 
			
		||||
- `-i <file>` or `--input <file path>` specifies the input file to be compressed or uncompressed (**MANDATORY**)
 | 
			
		||||
- `-o <file>` or `--output <file path>` specifies the name of the output file. If not used, the default output name for compression is `output.lzw`, and the default name for uncompressing is `<filename>_uncompressed`.
 | 
			
		||||
 | 
			
		||||
By default, `projet_lzw` will uncompress the (mandatory) input file.
 | 
			
		||||
 | 
			
		||||
It is planned to add in the future a sixth option, `-p` or `--passes` that will allow to compress multiple times the input file.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								bin/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								bin/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
*
 | 
			
		||||
!.gitignore
 | 
			
		||||
							
								
								
									
										2
									
								
								build/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								build/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
*
 | 
			
		||||
!.gitignore
 | 
			
		||||
							
								
								
									
										2
									
								
								debug/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								debug/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
*
 | 
			
		||||
!.gitignore
 | 
			
		||||
							
								
								
									
										3
									
								
								doc/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								doc/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,3 @@
 | 
			
		||||
*
 | 
			
		||||
!.gitignore
 | 
			
		||||
!Doxyfile
 | 
			
		||||
							
								
								
									
										2440
									
								
								doc/Doxyfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2440
									
								
								doc/Doxyfile
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										244
									
								
								includes/getopt.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										244
									
								
								includes/getopt.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,244 @@
 | 
			
		||||
/*
 | 
			
		||||
  Copyright 2005-2014 Rich Felker, et al.
 | 
			
		||||
 | 
			
		||||
  Permission is hereby granted, free of charge, to any person obtaining
 | 
			
		||||
  a copy of this software and associated documentation files (the
 | 
			
		||||
  "Software"), to deal in the Software without restriction, including
 | 
			
		||||
  without limitation the rights to use, copy, modify, merge, publish,
 | 
			
		||||
  distribute, sublicense, and/or sell copies of the Software, and to
 | 
			
		||||
  permit persons to whom the Software is furnished to do so, subject to
 | 
			
		||||
  the following conditions:
 | 
			
		||||
 | 
			
		||||
  The above copyright notice and this permission notice shall be
 | 
			
		||||
  included in all copies or substantial portions of the Software.
 | 
			
		||||
 | 
			
		||||
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | 
			
		||||
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 | 
			
		||||
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 | 
			
		||||
  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 | 
			
		||||
  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 | 
			
		||||
  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 | 
			
		||||
  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#include "getopt.h"
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
char *optarg;
 | 
			
		||||
int optind = 1, opterr = 1, optopt, __optpos, optreset = 0;
 | 
			
		||||
 | 
			
		||||
#define optpos __optpos
 | 
			
		||||
 | 
			
		||||
static void __getopt_msg(const char *a, const char *b, const char *c,
 | 
			
		||||
                         size_t l) {
 | 
			
		||||
  FILE *f = stderr;
 | 
			
		||||
  flockfile(f);
 | 
			
		||||
  fputs(a, f) >= 0 && fwrite(b, strlen(b), 1, f) &&
 | 
			
		||||
      fwrite(c, 1, l, f) == l &&putc('\n', f);
 | 
			
		||||
  funlockfile(f);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int getopt(int argc, char *const argv[], const char *optstring) {
 | 
			
		||||
  int i, c, d;
 | 
			
		||||
  int k, l;
 | 
			
		||||
  char *optchar;
 | 
			
		||||
 | 
			
		||||
  if (!optind || optreset) {
 | 
			
		||||
    optreset = 0;
 | 
			
		||||
    __optpos = 0;
 | 
			
		||||
    optind = 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (optind >= argc || !argv[optind])
 | 
			
		||||
    return -1;
 | 
			
		||||
 | 
			
		||||
  if (argv[optind][0] != '-') {
 | 
			
		||||
    if (optstring[0] == '-') {
 | 
			
		||||
      optarg = argv[optind++];
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    return -1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!argv[optind][1])
 | 
			
		||||
    return -1;
 | 
			
		||||
 | 
			
		||||
  if (argv[optind][1] == '-' && !argv[optind][2])
 | 
			
		||||
    return optind++, -1;
 | 
			
		||||
 | 
			
		||||
  if (!optpos)
 | 
			
		||||
    optpos++;
 | 
			
		||||
  c = argv[optind][optpos], k = 1;
 | 
			
		||||
  optchar = argv[optind] + optpos;
 | 
			
		||||
  optopt = c;
 | 
			
		||||
  optpos += k;
 | 
			
		||||
 | 
			
		||||
  if (!argv[optind][optpos]) {
 | 
			
		||||
    optind++;
 | 
			
		||||
    optpos = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (optstring[0] == '-' || optstring[0] == '+')
 | 
			
		||||
    optstring++;
 | 
			
		||||
 | 
			
		||||
  i = 0;
 | 
			
		||||
  d = 0;
 | 
			
		||||
  do {
 | 
			
		||||
    d = optstring[i], l = 1;
 | 
			
		||||
    if (l > 0)
 | 
			
		||||
      i += l;
 | 
			
		||||
    else
 | 
			
		||||
      i++;
 | 
			
		||||
  } while (l && d != c);
 | 
			
		||||
 | 
			
		||||
  if (d != c) {
 | 
			
		||||
    if (optstring[0] != ':' && opterr)
 | 
			
		||||
      __getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
 | 
			
		||||
    return '?';
 | 
			
		||||
  }
 | 
			
		||||
  if (optstring[i] == ':') {
 | 
			
		||||
    if (optstring[i + 1] == ':')
 | 
			
		||||
      optarg = 0;
 | 
			
		||||
    else if (optind >= argc) {
 | 
			
		||||
      if (optstring[0] == ':')
 | 
			
		||||
        return ':';
 | 
			
		||||
      if (opterr)
 | 
			
		||||
        __getopt_msg(argv[0], ": option requires an argument: ", optchar, k);
 | 
			
		||||
      return '?';
 | 
			
		||||
    }
 | 
			
		||||
    if (optstring[i + 1] != ':' || optpos) {
 | 
			
		||||
      optarg = argv[optind++] + optpos;
 | 
			
		||||
      optpos = 0;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return c;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void permute(char *const *argv, int dest, int src) {
 | 
			
		||||
  char **av = (char **)argv;
 | 
			
		||||
  char *tmp = av[src];
 | 
			
		||||
  int i;
 | 
			
		||||
  for (i = src; i > dest; i--)
 | 
			
		||||
    av[i] = av[i - 1];
 | 
			
		||||
  av[dest] = tmp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __getopt_long_core(int argc, char *const *argv,
 | 
			
		||||
                              const char *optstring,
 | 
			
		||||
                              const struct option *longopts, int *idx,
 | 
			
		||||
                              int longonly) {
 | 
			
		||||
  optarg = 0;
 | 
			
		||||
  if (longopts && argv[optind][0] == '-' &&
 | 
			
		||||
      ((longonly && argv[optind][1] && argv[optind][1] != '-') ||
 | 
			
		||||
       (argv[optind][1] == '-' && argv[optind][2]))) {
 | 
			
		||||
    int colon = optstring[optstring[0] == '+' || optstring[0] == '-'] == ':';
 | 
			
		||||
    int i, cnt, match;
 | 
			
		||||
    char *opt;
 | 
			
		||||
    for (cnt = i = 0; longopts[i].name; i++) {
 | 
			
		||||
      const char *name = longopts[i].name;
 | 
			
		||||
      opt = argv[optind] + 1;
 | 
			
		||||
      if (*opt == '-')
 | 
			
		||||
        opt++;
 | 
			
		||||
      for (; *name && *name == *opt; name++, opt++)
 | 
			
		||||
        ;
 | 
			
		||||
      if (*opt && *opt != '=')
 | 
			
		||||
        continue;
 | 
			
		||||
      match = i;
 | 
			
		||||
      if (!*name) {
 | 
			
		||||
        cnt = 1;
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      cnt++;
 | 
			
		||||
    }
 | 
			
		||||
    if (cnt == 1) {
 | 
			
		||||
      i = match;
 | 
			
		||||
      optind++;
 | 
			
		||||
      optopt = longopts[i].val;
 | 
			
		||||
      if (*opt == '=') {
 | 
			
		||||
        if (!longopts[i].has_arg) {
 | 
			
		||||
          if (colon || !opterr)
 | 
			
		||||
            return '?';
 | 
			
		||||
          __getopt_msg(argv[0],
 | 
			
		||||
                       ": option does not take an argument: ", longopts[i].name,
 | 
			
		||||
                       strlen(longopts[i].name));
 | 
			
		||||
          return '?';
 | 
			
		||||
        }
 | 
			
		||||
        optarg = opt + 1;
 | 
			
		||||
      } else if (longopts[i].has_arg == required_argument) {
 | 
			
		||||
        if (!(optarg = argv[optind])) {
 | 
			
		||||
          if (colon)
 | 
			
		||||
            return ':';
 | 
			
		||||
          if (!opterr)
 | 
			
		||||
            return '?';
 | 
			
		||||
          __getopt_msg(argv[0],
 | 
			
		||||
                       ": option requires an argument: ", longopts[i].name,
 | 
			
		||||
                       strlen(longopts[i].name));
 | 
			
		||||
          return '?';
 | 
			
		||||
        }
 | 
			
		||||
        optind++;
 | 
			
		||||
      }
 | 
			
		||||
      if (idx)
 | 
			
		||||
        *idx = i;
 | 
			
		||||
      if (longopts[i].flag) {
 | 
			
		||||
        *longopts[i].flag = longopts[i].val;
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
      return longopts[i].val;
 | 
			
		||||
    }
 | 
			
		||||
    if (argv[optind][1] == '-') {
 | 
			
		||||
      if (!colon && opterr)
 | 
			
		||||
        __getopt_msg(argv[0],
 | 
			
		||||
                     cnt ? ": option is ambiguous: "
 | 
			
		||||
                         : ": unrecognized option: ",
 | 
			
		||||
                     argv[optind] + 2, strlen(argv[optind] + 2));
 | 
			
		||||
      optind++;
 | 
			
		||||
      return '?';
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return getopt(argc, argv, optstring);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __getopt_long(int argc, char *const *argv, const char *optstring,
 | 
			
		||||
                         const struct option *longopts, int *idx,
 | 
			
		||||
                         int longonly) {
 | 
			
		||||
  int ret, skipped, resumed;
 | 
			
		||||
  if (!optind || optreset) {
 | 
			
		||||
    optreset = 0;
 | 
			
		||||
    __optpos = 0;
 | 
			
		||||
    optind = 1;
 | 
			
		||||
  }
 | 
			
		||||
  if (optind >= argc || !argv[optind])
 | 
			
		||||
    return -1;
 | 
			
		||||
  skipped = optind;
 | 
			
		||||
  if (optstring[0] != '+' && optstring[0] != '-') {
 | 
			
		||||
    int i;
 | 
			
		||||
    for (i = optind;; i++) {
 | 
			
		||||
      if (i >= argc || !argv[i])
 | 
			
		||||
        return -1;
 | 
			
		||||
      if (argv[i][0] == '-' && argv[i][1])
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    optind = i;
 | 
			
		||||
  }
 | 
			
		||||
  resumed = optind;
 | 
			
		||||
  ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly);
 | 
			
		||||
  if (resumed > skipped) {
 | 
			
		||||
    int i, cnt = optind - resumed;
 | 
			
		||||
    for (i = 0; i < cnt; i++)
 | 
			
		||||
      permute(argv, skipped, optind - 1);
 | 
			
		||||
    optind = skipped + cnt;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int getopt_long(int argc, char *const *argv, const char *optstring,
 | 
			
		||||
                const struct option *longopts, int *idx) {
 | 
			
		||||
  return __getopt_long(argc, argv, optstring, longopts, idx, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int getopt_long_only(int argc, char *const *argv, const char *optstring,
 | 
			
		||||
                     const struct option *longopts, int *idx) {
 | 
			
		||||
  return __getopt_long(argc, argv, optstring, longopts, idx, 1);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										54
									
								
								includes/getopt.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								includes/getopt.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,54 @@
 | 
			
		||||
/*
 | 
			
		||||
  Copyright 2005-2014 Rich Felker, et al.
 | 
			
		||||
 | 
			
		||||
  Permission is hereby granted, free of charge, to any person obtaining
 | 
			
		||||
  a copy of this software and associated documentation files (the
 | 
			
		||||
  "Software"), to deal in the Software without restriction, including
 | 
			
		||||
  without limitation the rights to use, copy, modify, merge, publish,
 | 
			
		||||
  distribute, sublicense, and/or sell copies of the Software, and to
 | 
			
		||||
  permit persons to whom the Software is furnished to do so, subject to
 | 
			
		||||
  the following conditions:
 | 
			
		||||
 | 
			
		||||
  The above copyright notice and this permission notice shall be
 | 
			
		||||
  included in all copies or substantial portions of the Software.
 | 
			
		||||
 | 
			
		||||
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | 
			
		||||
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 | 
			
		||||
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 | 
			
		||||
  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 | 
			
		||||
  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 | 
			
		||||
  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 | 
			
		||||
  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#ifndef _GETOPT_H
 | 
			
		||||
#define _GETOPT_H
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int getopt(int, char *const[], const char *);
 | 
			
		||||
extern char *optarg;
 | 
			
		||||
extern int optind, opterr, optopt, optreset;
 | 
			
		||||
 | 
			
		||||
struct option {
 | 
			
		||||
  const char *name;
 | 
			
		||||
  int has_arg;
 | 
			
		||||
  int *flag;
 | 
			
		||||
  int val;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int getopt_long(int, char *const *, const char *, const struct option *, int *);
 | 
			
		||||
int getopt_long_only(int, char *const *, const char *, const struct option *,
 | 
			
		||||
                     int *);
 | 
			
		||||
 | 
			
		||||
#define no_argument 0
 | 
			
		||||
#define required_argument 1
 | 
			
		||||
#define optional_argument 2
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										177
									
								
								src/bitpack.cc
									
									
									
									
									
								
							
							
						
						
									
										177
									
								
								src/bitpack.cc
									
									
									
									
									
								
							@ -1,177 +0,0 @@
 | 
			
		||||
#include "bitpack.hh"
 | 
			
		||||
#include "common.hh"
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <cmath>
 | 
			
		||||
 | 
			
		||||
using std::uint16_t;
 | 
			
		||||
using std::vector;
 | 
			
		||||
using uchar = unsigned char;
 | 
			
		||||
using vuint16 = vector<uint16_t>;
 | 
			
		||||
using vuchar = vector<uchar>;
 | 
			
		||||
using ustring = std::basic_string<unsigned char>;
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] int max(const int t_n) { return ipow(2, t_n) - 1; }
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] constexpr uint16_t mask_n(int t_nb_bits) {
 | 
			
		||||
  if (t_nb_bits == 0) {
 | 
			
		||||
    return 0;
 | 
			
		||||
  }
 | 
			
		||||
  uint16_t mask = mask_n(t_nb_bits - 1);
 | 
			
		||||
  mask = static_cast<uint16_t>(mask << 1);
 | 
			
		||||
  mask |= 0x1;
 | 
			
		||||
  return mask;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
constexpr uint16_t masks[17] = {
 | 
			
		||||
    mask_n(0),  mask_n(1),  mask_n(2),  mask_n(3),  mask_n(4),  mask_n(5),
 | 
			
		||||
    mask_n(6),  mask_n(7),  mask_n(8),  mask_n(9),  mask_n(10), mask_n(11),
 | 
			
		||||
    mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)};
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//                                  packing                                  //
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vuchar pack(const vuint16 &t_input) {
 | 
			
		||||
  vuchar vec{};
 | 
			
		||||
  return pack_n(t_input.begin(), t_input.end(), vec, 9);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vuchar pack_n(const vuint16::const_iterator t_input_begin,
 | 
			
		||||
                            const vuint16::const_iterator t_input_end,
 | 
			
		||||
                            vuchar &t_res, int t_n) {
 | 
			
		||||
  if (t_n == 16) {
 | 
			
		||||
    return pack_16(t_input_begin, t_input_end, t_res);
 | 
			
		||||
  }
 | 
			
		||||
  const int max_value = max(t_n); // max value held within t_n bits
 | 
			
		||||
 | 
			
		||||
  int step = t_n % 8;
 | 
			
		||||
  int left_shift = 0;
 | 
			
		||||
  int right_shift = 0;
 | 
			
		||||
  uchar current_char = 0;
 | 
			
		||||
  bool char_touched = false;
 | 
			
		||||
 | 
			
		||||
  // pour chaque élément
 | 
			
		||||
  for (auto it = t_input_begin; it != t_input_end; ++it) {
 | 
			
		||||
    // si on a atteint ou dépassé la valeur maximale, on change de nombre de
 | 
			
		||||
    // bits
 | 
			
		||||
    if (*it >= max_value) {
 | 
			
		||||
      // écriture du masque pour notifier à la décompression du changement de
 | 
			
		||||
      // bits
 | 
			
		||||
      if ((left_shift += step) >= t_n) {
 | 
			
		||||
        left_shift = (left_shift - t_n) + step;
 | 
			
		||||
      }
 | 
			
		||||
      const auto mask = masks[t_n] >> left_shift;
 | 
			
		||||
      t_res.push_back(static_cast<uchar>(current_char | mask));
 | 
			
		||||
      bool zero_rs = (right_shift == 0);
 | 
			
		||||
 | 
			
		||||
      right_shift -= step;
 | 
			
		||||
      if (right_shift < 0 && !zero_rs) {
 | 
			
		||||
        // si right_shift est inférieur à zéro
 | 
			
		||||
        // si right_shift était différent de zéro, alors extra octet
 | 
			
		||||
        current_char = static_cast<uchar>(masks[t_n] >> (-right_shift) & 0xFFU);
 | 
			
		||||
        t_res.push_back(current_char);
 | 
			
		||||
      }
 | 
			
		||||
      t_res.push_back(static_cast<uchar>(masks[t_n]));
 | 
			
		||||
      return pack_n(it, t_input_end, t_res, t_n + 1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // écriture normale
 | 
			
		||||
    if ((left_shift += step) >= t_n) {
 | 
			
		||||
      left_shift = (left_shift - t_n) + step;
 | 
			
		||||
    }
 | 
			
		||||
    t_res.push_back(
 | 
			
		||||
        static_cast<uchar>(current_char | (*it >> left_shift & 0xFFU)));
 | 
			
		||||
 | 
			
		||||
    bool zero_rs = (right_shift == 0);
 | 
			
		||||
    right_shift -= step;
 | 
			
		||||
    if (right_shift < 0) {
 | 
			
		||||
      if (!zero_rs) {
 | 
			
		||||
        current_char = static_cast<uchar>(*it >> (-right_shift) & 0xFFU);
 | 
			
		||||
        t_res.push_back(current_char);
 | 
			
		||||
      }
 | 
			
		||||
      right_shift = 8 + right_shift;
 | 
			
		||||
    }
 | 
			
		||||
    if (right_shift == 0) {
 | 
			
		||||
      current_char = static_cast<uchar>(*it & 0xFFU);
 | 
			
		||||
      t_res.push_back(current_char);
 | 
			
		||||
      current_char = 0;
 | 
			
		||||
      char_touched = false;
 | 
			
		||||
    } else {
 | 
			
		||||
      current_char = static_cast<uchar>(*it << right_shift & 0xFFU);
 | 
			
		||||
      char_touched = true;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  if (char_touched) {
 | 
			
		||||
    t_res.push_back(current_char);
 | 
			
		||||
  }
 | 
			
		||||
  return t_res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vuchar pack_16(const vuint16::const_iterator t_input_begin,
 | 
			
		||||
                             const vuint16::const_iterator t_input_end,
 | 
			
		||||
                             vuchar &t_res) {
 | 
			
		||||
  std::for_each(t_input_begin, t_input_end, [&t_res](const auto value) {
 | 
			
		||||
    t_res.push_back(static_cast<uchar>(value >> 8 & 0xFFU));
 | 
			
		||||
    t_res.push_back(static_cast<uchar>(value & 0xFFU));
 | 
			
		||||
  });
 | 
			
		||||
  return t_res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//                                 unpacking                                 //
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vuint16 unpack(ustring &&t_input) {
 | 
			
		||||
  vuint16 vec{};
 | 
			
		||||
  return unpack_n(t_input.begin(), t_input.end(), vec, 9);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vuint16 unpack_n(const ustring::const_iterator t_begin,
 | 
			
		||||
                               const ustring::const_iterator t_end,
 | 
			
		||||
                               vuint16 &t_res, int t_n) {
 | 
			
		||||
  if (t_n == 16) {
 | 
			
		||||
    return unpack_16(t_begin, t_end, t_res);
 | 
			
		||||
  }
 | 
			
		||||
  int step = t_n % 8;
 | 
			
		||||
  int left_shift = 0;
 | 
			
		||||
  int right_shift = 0;
 | 
			
		||||
  const int max_value = max(t_n);
 | 
			
		||||
  for (auto it = t_begin; it < t_end - 1; /* nope */) {
 | 
			
		||||
    uint16_t current_char = 0;
 | 
			
		||||
    // left bits
 | 
			
		||||
    if ((left_shift += step) >= t_n) {
 | 
			
		||||
      left_shift = (left_shift - t_n) + step;
 | 
			
		||||
    }
 | 
			
		||||
    current_char = static_cast<uint16_t>(*it << left_shift) & masks[t_n];
 | 
			
		||||
    // right bits
 | 
			
		||||
    bool zero_rs = (right_shift == 0);
 | 
			
		||||
    right_shift -= step;
 | 
			
		||||
    if (right_shift < 0) {
 | 
			
		||||
      // if previous right shift was negative and not zero
 | 
			
		||||
      if (!zero_rs) {
 | 
			
		||||
        current_char |= *++it << (-right_shift) & masks[16 + right_shift];
 | 
			
		||||
      }
 | 
			
		||||
      right_shift = 8 + right_shift;
 | 
			
		||||
    }
 | 
			
		||||
    current_char |= *++it >> right_shift & masks[8 - right_shift];
 | 
			
		||||
    // char made!
 | 
			
		||||
    if (current_char >= max_value) { // if it is the mask
 | 
			
		||||
      return unpack_n(it + 1, t_end, t_res, t_n + 1);
 | 
			
		||||
    }
 | 
			
		||||
    current_char &= masks[t_n];
 | 
			
		||||
    t_res.push_back(current_char);
 | 
			
		||||
    if (right_shift == 0) {
 | 
			
		||||
      ++it;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return t_res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vuint16 unpack_16(const ustring::const_iterator t_begin,
 | 
			
		||||
                                const ustring::const_iterator t_end,
 | 
			
		||||
                                vuint16 &t_res) {
 | 
			
		||||
  for (auto it = t_begin; it < t_end; ++it) {
 | 
			
		||||
    t_res.push_back(static_cast<uint16_t>(*it << 8 | *++it));
 | 
			
		||||
  }
 | 
			
		||||
  return t_res;
 | 
			
		||||
}
 | 
			
		||||
@ -1,34 +0,0 @@
 | 
			
		||||
#ifndef LZW_SRC_BITPACK_H_
 | 
			
		||||
#define LZW_SRC_BITPACK_H_
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<unsigned char>
 | 
			
		||||
pack(const std::vector<std::uint16_t> &);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<unsigned char>
 | 
			
		||||
pack_n(std::vector<std::uint16_t>::const_iterator,
 | 
			
		||||
       std::vector<std::uint16_t>::const_iterator, std::vector<unsigned char> &,
 | 
			
		||||
       int);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<unsigned char>
 | 
			
		||||
pack_16(std::vector<std::uint16_t>::const_iterator,
 | 
			
		||||
        std::vector<std::uint16_t>::const_iterator,
 | 
			
		||||
        std::vector<unsigned char> &);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<std::uint16_t>
 | 
			
		||||
unpack(std::basic_string<unsigned char> &&);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<std::uint16_t>
 | 
			
		||||
unpack_n(std::basic_string<unsigned char>::const_iterator t_begin,
 | 
			
		||||
         std::basic_string<unsigned char>::const_iterator t_end,
 | 
			
		||||
         std::vector<std::uint16_t> &, int t_n);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<std::uint16_t>
 | 
			
		||||
unpack_16(std::basic_string<unsigned char>::const_iterator,
 | 
			
		||||
          std::basic_string<unsigned char>::const_iterator,
 | 
			
		||||
          std::vector<std::uint16_t> &);
 | 
			
		||||
 | 
			
		||||
#endif /* LZW_SRC_BITPACK_H_ */
 | 
			
		||||
@ -1,69 +1,44 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file common.cc
 | 
			
		||||
 *   \brief Implementation for functions in common
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "common.hh"
 | 
			
		||||
 | 
			
		||||
using std::uint16_t;
 | 
			
		||||
using std::uint8_t;
 | 
			
		||||
using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
 | 
			
		||||
using ustring = std::basic_string<unsigned char>;
 | 
			
		||||
using p_ustring = std::shared_ptr<ustring>;
 | 
			
		||||
using uint8_t = std::uint8_t;
 | 
			
		||||
using uint32_t = std::uint32_t;
 | 
			
		||||
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] int ipow(int base, int exp) {
 | 
			
		||||
  int result = 1;
 | 
			
		||||
  for (;;) {
 | 
			
		||||
    if (exp & 1) {
 | 
			
		||||
      result *= base;
 | 
			
		||||
    }
 | 
			
		||||
    exp >>= 1;
 | 
			
		||||
    if (exp == 0) {
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    base *= base;
 | 
			
		||||
  }
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::pair<bool, uint16_t>
 | 
			
		||||
dico(dic_comp_t &t_dictionary, const uint16_t t_nr_chaine, const uint8_t t_c) {
 | 
			
		||||
/**
 | 
			
		||||
 *  Cette fonction a pour double usage la recherche d’une chaine de caractères
 | 
			
		||||
 *  dans le dictionnaire, ou bien l’ajout d’une nouvelle chaîne si celle-ci n’est
 | 
			
		||||
 *  pas déjà présente. Une chaine de caractères est représentée par un couple
 | 
			
		||||
 *  numéro de chaine / caractère, le numéro de chaine renvoyant au caractère
 | 
			
		||||
 *  précédent (soit son code ASCII, soit son indice dans le dictionnaire) et le
 | 
			
		||||
 *  caractère se référant au dernier caractère de la chaine courante. Si le
 | 
			
		||||
 *  numéro de chaine est -1, alors il s’agit du premier caractère de la chaine,
 | 
			
		||||
 *  et la valeur renvoyée sera la valeur ASCII du caractère. La fonction renvoie
 | 
			
		||||
 *  une paire bool/uint32_t, la valeur booléene indiquant si une nouvelle fut
 | 
			
		||||
 *  ajoutée dans le dictionnaire ou non, et le uint32_t indiquant la valeur
 | 
			
		||||
 *  numérique de la chaîne dans le dictionnaire.
 | 
			
		||||
 *
 | 
			
		||||
 *  \param t_dictionary Dictionnaire
 | 
			
		||||
 *  \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary
 | 
			
		||||
 *  \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
 | 
			
		||||
 *  \return const std::pair<bool, uint32_t>
 | 
			
		||||
 */
 | 
			
		||||
const std::pair<bool, uint32_t>
 | 
			
		||||
dico(std::map<std::pair<uint32_t, uint8_t>, uint32_t> &t_dictionary,
 | 
			
		||||
     uint32_t t_nr_chaine, uint8_t t_c) {
 | 
			
		||||
  if (t_nr_chaine == 0xFFFF) {
 | 
			
		||||
    return std::make_pair(true, t_c);
 | 
			
		||||
  }
 | 
			
		||||
  auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
 | 
			
		||||
  if (e != 0)
 | 
			
		||||
    return std::make_pair(true, e);
 | 
			
		||||
  e = static_cast<uint16_t>(t_dictionary.size() + 255);
 | 
			
		||||
  return std::make_pair(false, e);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
 | 
			
		||||
                                      const uint16_t t_code,
 | 
			
		||||
                                      const uint16_t t_old) {
 | 
			
		||||
  // le code existe dans le dictionnaire s’il est < 256
 | 
			
		||||
  if (t_code < 256) {
 | 
			
		||||
    ustring e{static_cast<unsigned char>(t_code)};
 | 
			
		||||
    // 256 car on n'a pas encore tenté d'insérer de nouveau caractère
 | 
			
		||||
    if (t_old < 256) {
 | 
			
		||||
      t_dict[static_cast<uint16_t>(t_dict.size() + 256)] =
 | 
			
		||||
          static_cast<unsigned char>(t_old) + e;
 | 
			
		||||
    } else {
 | 
			
		||||
      t_dict[static_cast<uint16_t>(t_dict.size() + 256)] = t_dict[t_old] + e;
 | 
			
		||||
    }
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  auto &e = t_dict[t_code];
 | 
			
		||||
  auto str = (t_old < 256) ? ustring{static_cast<unsigned char>(t_old)}
 | 
			
		||||
                           : t_dict[t_old];
 | 
			
		||||
 | 
			
		||||
  // le code existe dans le dictionnaire
 | 
			
		||||
  if (!e.empty()) {
 | 
			
		||||
    str += e[0];
 | 
			
		||||
    const auto index = static_cast<uint16_t>(t_dict.size() + 256);
 | 
			
		||||
    t_dict[index] = str;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // le code n'existe pas encore dans le dictionnaire
 | 
			
		||||
  str += str[0];
 | 
			
		||||
  e = str;
 | 
			
		||||
  t_dict[t_code] = e;
 | 
			
		||||
  return e;
 | 
			
		||||
  return (e != 0) ? std::make_pair(true, e)
 | 
			
		||||
             : std::make_pair(
 | 
			
		||||
                   false,
 | 
			
		||||
                   (e = static_cast<
 | 
			
		||||
                            typename std::remove_reference<decltype(e)>::type>(
 | 
			
		||||
                            t_dictionary.size()) +
 | 
			
		||||
                        255));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,18 +1,17 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file common.hh
 | 
			
		||||
 *   \brief Header for functions in common
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef LZW_SRC_COMMON_H_
 | 
			
		||||
#define LZW_SRC_COMMON_H_
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <memory>
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] int ipow(int, int);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::pair<bool, std::uint16_t>
 | 
			
		||||
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
 | 
			
		||||
     const std::uint16_t, const std::uint8_t);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::basic_string<unsigned char>
 | 
			
		||||
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
 | 
			
		||||
                const std::uint16_t, const std::uint16_t);
 | 
			
		||||
/// \brief Recherche ou ajout de chaine dans le dictionnaire
 | 
			
		||||
const std::pair<bool, std::uint32_t>
 | 
			
		||||
dico(std::map<std::pair<std::uint32_t, std::uint8_t>, std::uint32_t> &,
 | 
			
		||||
     uint32_t, uint8_t);
 | 
			
		||||
 | 
			
		||||
#endif /* LZW_SRC_COMMON_H_ */
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										186
									
								
								src/compress.cc
									
									
									
									
									
								
							
							
						
						
									
										186
									
								
								src/compress.cc
									
									
									
									
									
								
							@ -1,77 +1,149 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file compress.cc
 | 
			
		||||
 *   \brief Implementation of compression
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "compress.hh"
 | 
			
		||||
#include "common.hh"
 | 
			
		||||
#include "io.hh"
 | 
			
		||||
#include "utf8.hh"
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <iterator>
 | 
			
		||||
using std::ios;
 | 
			
		||||
using std::string;
 | 
			
		||||
using std::uint16_t;
 | 
			
		||||
using std::uint8_t;
 | 
			
		||||
using std::vector;
 | 
			
		||||
using vuint16 = vector<uint16_t>;
 | 
			
		||||
using vvuint16 = vector<vuint16>;
 | 
			
		||||
using ustring = std::basic_string<unsigned char>;
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include "io.hh"
 | 
			
		||||
 | 
			
		||||
using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
 | 
			
		||||
using dict_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
 | 
			
		||||
using ustring = std::basic_string<uint8_t>; // chaîne non encodée
 | 
			
		||||
using uvec = std::vector<std::uint32_t>;         // chaîne encodée
 | 
			
		||||
using std::printf;
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] ustring read_file(const string &filename) {
 | 
			
		||||
  std::ifstream file{filename, ios::binary};
 | 
			
		||||
  assert(file);
 | 
			
		||||
  file.unsetf(ios::skipws);
 | 
			
		||||
  file.seekg(0, ios::end);
 | 
			
		||||
  const auto file_size = file.tellg();
 | 
			
		||||
  file.seekg(0, ios::beg);
 | 
			
		||||
  ustring res{};
 | 
			
		||||
  res.reserve(file_size);
 | 
			
		||||
  res.insert(res.begin(), std::istream_iterator<unsigned char>(file),
 | 
			
		||||
             std::istream_iterator<unsigned char>());
 | 
			
		||||
  file.close();
 | 
			
		||||
  return res;
 | 
			
		||||
constexpr size_t CHUNK_SIZE = 32768;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 *
 | 
			
		||||
 *  Reçoit une liste de paires std::thread/vecteurs, le premier étant le
 | 
			
		||||
 *  processus dont sa sortie est stockée dans le second. La sortie, une liste
 | 
			
		||||
 *  de caractères uint32_t, est écrite dans le fichier de sortie \p out.
 | 
			
		||||
 *
 | 
			
		||||
 *  \param[in] t_threads
 | 
			
		||||
 *  \param[out] t_out
 | 
			
		||||
 */
 | 
			
		||||
void join_and_write(
 | 
			
		||||
    std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> &t_threads,
 | 
			
		||||
    std::vector<std::vector<std::uint32_t>> &compressed_text) {
 | 
			
		||||
  for (auto &elem : t_threads) {
 | 
			
		||||
    (*elem.first).join();
 | 
			
		||||
  }
 | 
			
		||||
  for (auto &elem : t_threads) {
 | 
			
		||||
    compressed_text.push_back(std::move(elem.second));
 | 
			
		||||
  }
 | 
			
		||||
  t_threads.clear();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] vvuint16 lzw_compress(ustring &&t_text) {
 | 
			
		||||
  vvuint16 res{};
 | 
			
		||||
  const auto DICT_MAX = static_cast<size_t>(ipow(2, 14) - 256); /* 16 bits */
 | 
			
		||||
  uint16_t w = 0xFFFF;
 | 
			
		||||
  vuint16 chunk{};
 | 
			
		||||
  dict_t dict{};
 | 
			
		||||
  for (const auto c : t_text) {
 | 
			
		||||
    if (dict.size() >= DICT_MAX) {
 | 
			
		||||
      if (w != 0xFFFF) {
 | 
			
		||||
        chunk.push_back(w);
 | 
			
		||||
      }
 | 
			
		||||
      res.push_back(chunk);
 | 
			
		||||
      w = 0xFFFF;
 | 
			
		||||
      chunk.clear();
 | 
			
		||||
      dict.clear();
 | 
			
		||||
    }
 | 
			
		||||
    if (const auto &[exists, pos] = dico(dict, w, static_cast<uint8_t>(c));
 | 
			
		||||
        exists) {
 | 
			
		||||
/**
 | 
			
		||||
 *  La chaîne de caractère \p t_text est lue caractère par caractère, et est et
 | 
			
		||||
 *  selon la valeur de retour de la fonction \ref dico (permettant dans le même
 | 
			
		||||
 *  temps la création du dictionnaire), on rajoute un mot ou pas dans le vecteur
 | 
			
		||||
 *  de caractères UTF-8 représentant des mots de chars compressés. La fonction
 | 
			
		||||
 *  renvoie ledit vecteur de uint32_t via le paramètre \p t_res.
 | 
			
		||||
 *
 | 
			
		||||
 *  \param[in] t_text Chaîne de caractères uint8_t représentant le fichier d'entrée
 | 
			
		||||
 *  \param[out] t_res Chaîne de caractères de sortie
 | 
			
		||||
 */
 | 
			
		||||
void lzw_compress(const std::vector<char> &t_text, uvec &t_res) {
 | 
			
		||||
  dict_t dictionary{};
 | 
			
		||||
  std::puts("Compressing...");
 | 
			
		||||
  uint32_t w = 0xFFFF;
 | 
			
		||||
 | 
			
		||||
  constexpr size_t DICT_MAX = 7936; /* 12 bits */
 | 
			
		||||
 | 
			
		||||
  for (const auto &c : t_text) {
 | 
			
		||||
    if (dictionary.size() >= DICT_MAX) {
 | 
			
		||||
      t_res.push_back(static_cast<uint32_t>(w));
 | 
			
		||||
      w = static_cast<uint32_t>(c);
 | 
			
		||||
    } else if (const auto &[exists, pos] =
 | 
			
		||||
                   dico(dictionary, w, static_cast<std::uint8_t>(c));
 | 
			
		||||
               exists) {
 | 
			
		||||
      w = pos;
 | 
			
		||||
    } else {
 | 
			
		||||
      chunk.push_back(w);
 | 
			
		||||
      w = static_cast<uint16_t>(c);
 | 
			
		||||
      t_res.push_back(static_cast<uint32_t>(w));
 | 
			
		||||
      w = static_cast<std::uint8_t>(c);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  if (w != 0xFFFF) {
 | 
			
		||||
    chunk.push_back(w);
 | 
			
		||||
    res.push_back(std::move(chunk));
 | 
			
		||||
  }
 | 
			
		||||
  return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 *  Wrapper de la fonction \ref lzw_compress gérant l'ouverture, la lecture,
 | 
			
		||||
 *  l'écriture et la fermeture des fichiers d’entrée et de sortie. Si \p
 | 
			
		||||
 *  t_out_file est nul (chemin non spécifié), il prendra alors la valeur de
 | 
			
		||||
 *  \p t_in_file à laquelle sera annexé l’extension `.lzw`.
 | 
			
		||||
 *
 | 
			
		||||
 *  \param[in] t_in_file Chemin vers le fichier d’entrée
 | 
			
		||||
 *  \param[in] t_out_file Chemin vers le fichier de sortie
 | 
			
		||||
 */
 | 
			
		||||
void compress(const std::string &t_in_file, const char *t_out_file) {
 | 
			
		||||
  std::ofstream out{(t_out_file != nullptr) ? t_out_file : "output.lzw",
 | 
			
		||||
                    ios::out | ios::binary};
 | 
			
		||||
  if (!out.is_open()) {
 | 
			
		||||
    std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
 | 
			
		||||
              << ": could not open output file. Aborting...\n";
 | 
			
		||||
  // Fichier d’entrée
 | 
			
		||||
  std::ifstream input_file{t_in_file};
 | 
			
		||||
  if (!input_file.is_open()) {
 | 
			
		||||
    std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 2
 | 
			
		||||
              << ": could not open output file \"" << t_in_file
 | 
			
		||||
              << "\". Aborting...\n";
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
  const auto compressed_text(lzw_compress(read_file(t_in_file)));
 | 
			
		||||
 | 
			
		||||
  // Fichier de sortie
 | 
			
		||||
  FILE *out =
 | 
			
		||||
      (t_out_file != nullptr) ? fopen(t_out_file, "wb") : fopen("output.lzw", "wb");
 | 
			
		||||
  if (out == nullptr) {
 | 
			
		||||
    std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
 | 
			
		||||
              << ": could not open output file. Aborting...\n";
 | 
			
		||||
    input_file.close();
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // collection of chunks
 | 
			
		||||
  std::vector<std::vector<std::uint32_t>> compressed_text{};
 | 
			
		||||
 | 
			
		||||
  // thread pool
 | 
			
		||||
  std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> threads{};
 | 
			
		||||
 | 
			
		||||
  // chunk chars
 | 
			
		||||
  std::vector<char> chunk(CHUNK_SIZE, 0);
 | 
			
		||||
  while (input_file.read(chunk.data(),
 | 
			
		||||
                         static_cast<std::streamsize>(chunk.size()))) {
 | 
			
		||||
    threads.emplace_back(nullptr, uvec{});
 | 
			
		||||
    threads.back().second.reserve(CHUNK_SIZE);
 | 
			
		||||
    threads.back().first = std::make_unique<std::thread>(
 | 
			
		||||
        std::thread{lzw_compress, chunk, ref(threads.back().second)});
 | 
			
		||||
    assert(threads.back().first);
 | 
			
		||||
    if (threads.size() >= 8) {
 | 
			
		||||
      join_and_write(threads, compressed_text);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!threads.empty()) {
 | 
			
		||||
    join_and_write(threads, compressed_text);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (input_file.tellg() != std::ios::end) {
 | 
			
		||||
    std::puts("Leftovers, compressing...");
 | 
			
		||||
    {
 | 
			
		||||
      const auto prev_pos = input_file.tellg();
 | 
			
		||||
      input_file.seekg(0, std::ios::end);
 | 
			
		||||
      chunk.reserve(static_cast<size_t>(input_file.tellg() - prev_pos));
 | 
			
		||||
      input_file.seekg(prev_pos, std::ios::beg);
 | 
			
		||||
      std::istreambuf_iterator<char> itr(input_file);
 | 
			
		||||
      for (std::streamoff i = 0; i < prev_pos; ++i, ++itr){
 | 
			
		||||
        ;
 | 
			
		||||
      }
 | 
			
		||||
      chunk.assign((itr), std::istreambuf_iterator<char>());
 | 
			
		||||
    }
 | 
			
		||||
    uvec ret{};
 | 
			
		||||
    lzw_compress(chunk, ret);
 | 
			
		||||
    compressed_text.push_back(std::move(ret));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  write_file(out, compressed_text);
 | 
			
		||||
  out.close();
 | 
			
		||||
 | 
			
		||||
  fclose(out);
 | 
			
		||||
  input_file.close();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,14 +1,25 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file compress.hh
 | 
			
		||||
 *   \brief Header for compression functions
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef LZW_SRC_COMPRESS_H_
 | 
			
		||||
#define LZW_SRC_COMPRESS_H_
 | 
			
		||||
 | 
			
		||||
#include "common.hh"
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <thread>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::vector<std::vector<std::uint16_t>>
 | 
			
		||||
lzw_compress(std::basic_string<unsigned char> &&);
 | 
			
		||||
/// \brief Exécution des threads et écriture de leur résultat dans le fichier de sortie
 | 
			
		||||
void join_and_write(std::vector<std::pair<std::unique_ptr<std::thread>,
 | 
			
		||||
                                          std::vector<std::uint32_t>>> &,
 | 
			
		||||
                    std::vector<std::vector<std::uint32_t>> &);
 | 
			
		||||
 | 
			
		||||
/// \brief Compression d'une chaine de caractères
 | 
			
		||||
void lzw_compress(const std::vector<char> &, std::vector<std::uint32_t> &);
 | 
			
		||||
 | 
			
		||||
/// \brief Wrapper de \ref lzw_compress
 | 
			
		||||
void compress(const std::string &, const char *);
 | 
			
		||||
 | 
			
		||||
#endif /* LZW_SRC_COMPRESS_H_ */
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										110
									
								
								src/io.cc
									
									
									
									
									
								
							
							
						
						
									
										110
									
								
								src/io.cc
									
									
									
									
									
								
							@ -1,27 +1,95 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file io.cc
 | 
			
		||||
 *   \brief Body for file reading and writing
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "io.hh"
 | 
			
		||||
#include "bitpack.hh"
 | 
			
		||||
#include <array>
 | 
			
		||||
 | 
			
		||||
using std::uint16_t;
 | 
			
		||||
using std::vector;
 | 
			
		||||
using vuint16 = vector<uint16_t>;
 | 
			
		||||
using vvuint16 = vector<vuint16>;
 | 
			
		||||
 | 
			
		||||
void write_file(std::ofstream &t_out, const vvuint16 &t_chunks) {
 | 
			
		||||
  const auto nr_chunks = static_cast<uint16_t>(t_chunks.size());
 | 
			
		||||
#ifdef Debug
 | 
			
		||||
  std::printf("Number of chunks: %u\n", nr_chunks);
 | 
			
		||||
constexpr bool debug_mode = true;
 | 
			
		||||
#else
 | 
			
		||||
constexpr bool debug_mode = false;
 | 
			
		||||
#endif
 | 
			
		||||
  t_out.write(reinterpret_cast<const char *>(&nr_chunks), sizeof(nr_chunks));
 | 
			
		||||
  for (const auto &chunk : t_chunks) {
 | 
			
		||||
    write_chunk(t_out, chunk);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 *  Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de
 | 
			
		||||
 *  sortie est composé des éléments suivants :\n
 | 
			
		||||
 *  - Sur quatre octets sont écrit un `uint32_t` déterminant la taille d'un
 | 
			
		||||
 *    caractère\n
 | 
			
		||||
 *  - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de chunk
 | 
			
		||||
 *    composant le fichier\n
 | 
			
		||||
 *  - Sont ensuite écrits les chunks sur des nombres d’octets variable suivant
 | 
			
		||||
 *    la taille d’un caractère et le nombre de caractères\n
 | 
			
		||||
 *  \n
 | 
			
		||||
 *  Un chunk est composé de la manière qui suit :\n
 | 
			
		||||
 *  - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de
 | 
			
		||||
 *    caractères composant le chunk\n
 | 
			
		||||
 *  - Les caractères composant le chunk, accolés les uns au autres. Si le
 | 
			
		||||
 *    dernier caractère ne remplis pas le dernier octet du chunk, alors ces
 | 
			
		||||
 *    derniers bits seront initialisés à 0.\n
 | 
			
		||||
 *  La taille d’un chunk est donc la taille d’un caractère multiplié par le
 | 
			
		||||
 *  nombre de caractères du chunk, le tout divisé par 8. Si le résultat n’est
 | 
			
		||||
 *  pas un entier, alors il est nivelé vers le haut pour avoir un nombre entier
 | 
			
		||||
 *  d’octets (e.g. si le résultat est 103.4, alors 104 octets seront utilisés).
 | 
			
		||||
 *
 | 
			
		||||
 *  \param[out] t_out Fichier de sortie
 | 
			
		||||
 *  \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out
 | 
			
		||||
 */
 | 
			
		||||
void write_file(FILE *t_out, std::vector<std::vector<std::uint32_t>> &t_text) {
 | 
			
		||||
  {
 | 
			
		||||
    uint32_t char_size = 12;
 | 
			
		||||
    if constexpr (debug_mode) {
 | 
			
		||||
      std::printf("Char size: %u\n", char_size);
 | 
			
		||||
    }
 | 
			
		||||
    fwrite(&char_size, sizeof(uint32_t), 1, t_out);
 | 
			
		||||
    auto size = static_cast<uint32_t>(t_text.size());
 | 
			
		||||
    if constexpr (debug_mode) {
 | 
			
		||||
      std::printf("Number of chunks: %u\n", size);
 | 
			
		||||
    }
 | 
			
		||||
    fwrite(&size, sizeof(uint32_t), 1, t_out);
 | 
			
		||||
  }
 | 
			
		||||
  for(const auto &chunk : t_text) {
 | 
			
		||||
    // write size of chunk in uint32_t
 | 
			
		||||
    {
 | 
			
		||||
      auto size = static_cast<uint32_t>(chunk.size());
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        std::printf("Size of chunk: %u\n", size);
 | 
			
		||||
      }
 | 
			
		||||
      fwrite(&size, sizeof(uint32_t), 1, t_out);
 | 
			
		||||
    }
 | 
			
		||||
    uint8_t remainder = 0x00;
 | 
			
		||||
    for(size_t i = 0; i < chunk.size(); ++i) {
 | 
			
		||||
      if(i % 2 == 0) {
 | 
			
		||||
        // char = xxxx xxxx xxxx
 | 
			
		||||
        //        ^^^^^^^^^ ^^^^
 | 
			
		||||
        //          write   keep in remainder as xxxx0000
 | 
			
		||||
        auto temp = static_cast<unsigned char>(chunk[i] >> 4);
 | 
			
		||||
        fwrite(&temp, sizeof(temp), 1, t_out);
 | 
			
		||||
        if constexpr (debug_mode) {
 | 
			
		||||
          std::printf("writing: %x\t\t", temp);
 | 
			
		||||
        }
 | 
			
		||||
        remainder = static_cast<uint8_t>(chunk[i] << 4);
 | 
			
		||||
      } else {
 | 
			
		||||
        // already have `remainder = yyyy0000`
 | 
			
		||||
        //          char = xxxx xxxx xxxx
 | 
			
		||||
        //                 ^^^^ ^^^^^^^^^
 | 
			
		||||
        // remainder = yyyyxxxx   write after remainder
 | 
			
		||||
        // remainder = 00000000
 | 
			
		||||
        remainder &= static_cast<unsigned char>(chunk[i]) >> 8 & 0xF0;
 | 
			
		||||
        fwrite(&remainder, sizeof(remainder), 1, t_out);
 | 
			
		||||
        if constexpr (debug_mode) {
 | 
			
		||||
          std::printf("writing remainder: %x\t\t", remainder);
 | 
			
		||||
        }
 | 
			
		||||
        auto temp = static_cast<unsigned char>(chunk[i]);
 | 
			
		||||
        fwrite(&temp, sizeof(temp), 1, t_out);
 | 
			
		||||
        if constexpr (debug_mode) {
 | 
			
		||||
          std::printf("writing: %x\n", temp);
 | 
			
		||||
        }
 | 
			
		||||
        remainder = 0x00;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if(remainder != 0) {
 | 
			
		||||
      fwrite(&remainder, sizeof(remainder), 1, t_out);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void write_chunk(std::ofstream &t_out, const vuint16 &t_chunk) {
 | 
			
		||||
  const auto output = pack(t_chunk);
 | 
			
		||||
  const auto chunk_size = static_cast<uint32_t>(output.size());
 | 
			
		||||
  t_out.write(reinterpret_cast<const char *>(&chunk_size), sizeof(chunk_size));
 | 
			
		||||
  t_out.write(reinterpret_cast<const char *>(output.data()),
 | 
			
		||||
              sizeof(output[0]) * output.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										28
									
								
								src/io.hh
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								src/io.hh
									
									
									
									
									
								
							@ -1,15 +1,31 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file io.h
 | 
			
		||||
 *   \brief Header for file reading and writing
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef LZW_SRC_IO_H_
 | 
			
		||||
#define LZW_SRC_IO_H_
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
void write_file(std::ofstream &,
 | 
			
		||||
                const std::vector<std::vector<std::uint16_t>> &);
 | 
			
		||||
/*
 | 
			
		||||
 * Un fichier compressé se compose ainsi :
 | 
			
		||||
 * char_size : taille d'un caractère en bits (1B)
 | 
			
		||||
 * nb_chunk : nombre de chunks (4B)
 | 
			
		||||
 * chunks* : chunks
 | 
			
		||||
 *
 | 
			
		||||
 * Un chunk se compose ainsi :
 | 
			
		||||
 * nb_char_chunk : nombre de caractères du chunk (2B)
 | 
			
		||||
 * text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) / 8))
 | 
			
		||||
 *
 | 
			
		||||
 * Si le dernier caractère ne termine pas le dernier octet du chunk, les
 | 
			
		||||
 * derniers bits sont mit à zéro
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
void write_chunk(std::ofstream &, const std::vector<std::uint16_t> &);
 | 
			
		||||
 | 
			
		||||
/// \brief Écrit dans le fichier le texte compressé
 | 
			
		||||
void write_file(FILE *, std::vector<std::vector<std::uint32_t>> &);
 | 
			
		||||
 | 
			
		||||
#endif /* LZW_SRC_IO_H_ */
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										175
									
								
								src/main.cc
									
									
									
									
									
								
							
							
						
						
									
										175
									
								
								src/main.cc
									
									
									
									
									
								
							@ -1,45 +1,68 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file main.cc
 | 
			
		||||
 *   \brief Main file
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifdef Debug
 | 
			
		||||
constexpr bool debug_mode = true;
 | 
			
		||||
#else
 | 
			
		||||
constexpr bool debug_mode = false;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include "compress.hh"
 | 
			
		||||
#include "uncompress.hh"
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <getopt.h>
 | 
			
		||||
#include <tuple>
 | 
			
		||||
#include "getopt.h"
 | 
			
		||||
 | 
			
		||||
using std::printf;
 | 
			
		||||
using std::puts;
 | 
			
		||||
using std::string;
 | 
			
		||||
using std::tuple;
 | 
			
		||||
 | 
			
		||||
// custom types ///////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
  Dictionnaire :
 | 
			
		||||
  <
 | 
			
		||||
      <
 | 
			
		||||
          numéro chaine précédente,
 | 
			
		||||
          caractère ASCII
 | 
			
		||||
      >
 | 
			
		||||
      numéro chaine courante
 | 
			
		||||
  >
 | 
			
		||||
 */
 | 
			
		||||
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
 | 
			
		||||
using ustring = std::basic_string<uint8_t>; // chaine non encodée
 | 
			
		||||
using uvec = std::vector<uint32_t>;         // chaine encodée
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 *  \brief Affichage d’aide
 | 
			
		||||
 */
 | 
			
		||||
void help() {
 | 
			
		||||
  puts("Usage:\n\
 | 
			
		||||
lzw  [-options] [-i path] [-o path]\n\n\
 | 
			
		||||
The default action is to compress the input file to a .lzw file\n\
 | 
			
		||||
in which the directory in which the software is executed.\n\
 | 
			
		||||
Options available:\n\
 | 
			
		||||
-h --help\n\
 | 
			
		||||
\tdisplay the current message\n\
 | 
			
		||||
-i --input\n\
 | 
			
		||||
\tpath to the input file (MANDATORY)\n\
 | 
			
		||||
-o --output\n\
 | 
			
		||||
\tpath to the output file (if the file already exists, it will be\n\n\
 | 
			
		||||
\toverwritten). Default: input path + \".lzw\\n\
 | 
			
		||||
-c --compress\n\
 | 
			
		||||
\tcompress the input file\n\
 | 
			
		||||
-u --uncompress\n\
 | 
			
		||||
\tuncompresses the input file to the output file. If no output path\n\
 | 
			
		||||
\thas not been entered and if the input file ends with \".lzw\",\n\
 | 
			
		||||
\tthe extension \".lzw\" will be removed; otherwise, the extension\n\
 | 
			
		||||
\t\"_uncompresed\" will be added");
 | 
			
		||||
  puts("Usage:");
 | 
			
		||||
  puts("lzw  [-options] [-i path] [-o path]");
 | 
			
		||||
  puts("\tThe default action is to compress the input file to a .lzw file");
 | 
			
		||||
  puts("\tin which the directory in which the software is executed.");
 | 
			
		||||
  puts("\tOptions available:");
 | 
			
		||||
  puts("\t-i\tpath to the input file (mandatory)");
 | 
			
		||||
  puts("\t-o\tpath to the output file (if the file already exists, it will");
 | 
			
		||||
  puts("\t\tbe overwritten). Default: input path + \".lzw\"");
 | 
			
		||||
  puts("\t-c\tcompress the input file");
 | 
			
		||||
  puts("\t-d\tdecompresses the input file to the output file. If no output");
 | 
			
		||||
  puts("\t\tpath has not been entered and if the input file ends with ");
 | 
			
		||||
  puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
 | 
			
		||||
  puts("\t\textension \".uncompresed\" will be added");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::tuple<string, string, bool> process_args(int t_argc,
 | 
			
		||||
                                                            char *t_argv[]) {
 | 
			
		||||
  auto ret = std::make_tuple(string{}, string{}, true);
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  if constexpr (debug_mode) {
 | 
			
		||||
    for (int i = 0; i < argc; ++i)
 | 
			
		||||
      printf("argv[%d] = %s\n", i, argv[i]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string input_path{};
 | 
			
		||||
  std::string output_path{};
 | 
			
		||||
  bool compressing = true;
 | 
			
		||||
 | 
			
		||||
  while (true) {
 | 
			
		||||
    int option_index = 0;
 | 
			
		||||
    static struct option long_options[] = {
 | 
			
		||||
@ -49,49 +72,97 @@ Options available:\n\
 | 
			
		||||
        {"compress", no_argument, nullptr, 'c'},
 | 
			
		||||
        {"uncompress", no_argument, nullptr, 'u'},
 | 
			
		||||
        {nullptr, 0, nullptr, 0}};
 | 
			
		||||
    int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
 | 
			
		||||
    int c = getopt_long(argc, argv, "hi:o:cu", long_options, &option_index);
 | 
			
		||||
    if (c == -1)
 | 
			
		||||
      break;
 | 
			
		||||
    switch (c) {
 | 
			
		||||
    case 0:
 | 
			
		||||
    case 0: {
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("\noption %s", long_options[option_index].name);
 | 
			
		||||
        if (optarg) {
 | 
			
		||||
          printf(" with arg %s\n", optarg);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case 'h':
 | 
			
		||||
    }
 | 
			
		||||
    case 'h': {
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("From main - option --help passed\n");
 | 
			
		||||
      }
 | 
			
		||||
      help();
 | 
			
		||||
      exit(0);
 | 
			
		||||
    case 'i':
 | 
			
		||||
      std::get<0>(ret) = optarg;
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    case 'i': {
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("From main - option --input with value '%s'\n", optarg);
 | 
			
		||||
      }
 | 
			
		||||
      input_path = optarg;
 | 
			
		||||
      break;
 | 
			
		||||
    case 'o':
 | 
			
		||||
      std::get<1>(ret) = optarg;
 | 
			
		||||
    }
 | 
			
		||||
    case 'o': {
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("From main - option --output with value '%s'\n", optarg);
 | 
			
		||||
      }
 | 
			
		||||
      output_path = optarg;
 | 
			
		||||
      break;
 | 
			
		||||
    case 'c':
 | 
			
		||||
      std::get<2>(ret) = true;
 | 
			
		||||
    }
 | 
			
		||||
    case 'c': {
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("From main - option --compress\n");
 | 
			
		||||
      }
 | 
			
		||||
      compressing = true;
 | 
			
		||||
      break;
 | 
			
		||||
    case 'u':
 | 
			
		||||
      std::get<2>(ret) = false;
 | 
			
		||||
    }
 | 
			
		||||
    case 'u': {
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("From main - option --uncompress\n");
 | 
			
		||||
      }
 | 
			
		||||
      compressing = false;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case '?':
 | 
			
		||||
      [[fallthrough]];
 | 
			
		||||
    default:
 | 
			
		||||
    default: {
 | 
			
		||||
      puts("Error: unknown parameter.");
 | 
			
		||||
      if constexpr (debug_mode) {
 | 
			
		||||
        printf("From main - option -?\n");
 | 
			
		||||
      }
 | 
			
		||||
      help();
 | 
			
		||||
      exit(1);
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  const auto [input_path, output_path, compressing] = process_args(argc, argv);
 | 
			
		||||
  if (input_path.empty()) {
 | 
			
		||||
    help();
 | 
			
		||||
    return 0;
 | 
			
		||||
    puts("Error: no input file specified");
 | 
			
		||||
    return 2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (compressing) {
 | 
			
		||||
    compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
 | 
			
		||||
    /*
 | 
			
		||||
      TODO:
 | 
			
		||||
      - compresser le fichier d’entrée morceaux par morceaux, 16Ko à la fois
 | 
			
		||||
      - écrire la taille du segment compressé, puis le segment compressé
 | 
			
		||||
      - multithreading
 | 
			
		||||
      - compression multiple : nombre de compressions puis fichier compressé
 | 
			
		||||
      - bit-packing, limiter la taille du dictionnaire pour un certain nombre de
 | 
			
		||||
        bits.
 | 
			
		||||
     */
 | 
			
		||||
    if constexpr (debug_mode) {
 | 
			
		||||
      puts("Beginning compression");
 | 
			
		||||
    }
 | 
			
		||||
    if (output_path.empty()) {
 | 
			
		||||
      compress(input_path, nullptr);
 | 
			
		||||
    } else {
 | 
			
		||||
      compress(input_path, output_path.c_str());
 | 
			
		||||
    }
 | 
			
		||||
    // compress(input_path, output_path.c_str());
 | 
			
		||||
  } else {
 | 
			
		||||
    uncompress(input_path,
 | 
			
		||||
               (output_path.empty()) ? nullptr : output_path.c_str());
 | 
			
		||||
    puts("Not yet implemented :(");
 | 
			
		||||
    /*
 | 
			
		||||
      Inversion des types du dictionnaire pour retrouver les chaînes plus
 | 
			
		||||
      aisément
 | 
			
		||||
     */
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,60 +0,0 @@
 | 
			
		||||
#include "uncompress.hh"
 | 
			
		||||
#include "bitpack.hh"
 | 
			
		||||
#include "common.hh"
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <iterator>
 | 
			
		||||
 | 
			
		||||
using std::fclose;
 | 
			
		||||
using std::fopen;
 | 
			
		||||
using std::fseek;
 | 
			
		||||
using std::string;
 | 
			
		||||
using std::uint16_t;
 | 
			
		||||
using std::vector;
 | 
			
		||||
using ustring = std::basic_string<unsigned char>;
 | 
			
		||||
using vuint16 = vector<uint16_t>;
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
 | 
			
		||||
  ustring ret{};
 | 
			
		||||
  uint16_t old = 0;
 | 
			
		||||
  std::map<uint16_t, ustring> dict{};
 | 
			
		||||
  ret.append({static_cast<unsigned char>(t_compressed[0])});
 | 
			
		||||
  old = t_compressed[0];
 | 
			
		||||
  for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
 | 
			
		||||
    const auto uncompressed{dico_uncompress(dict, *it, old)};
 | 
			
		||||
    ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
 | 
			
		||||
    old = *it;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void uncompress(const string &t_input_name, const char *t_output_name) {
 | 
			
		||||
  FILE *const input = std::fopen(t_input_name.c_str(), "rb");
 | 
			
		||||
  assert(input);
 | 
			
		||||
  std::ofstream output{(t_output_name != nullptr)
 | 
			
		||||
                           ? t_output_name
 | 
			
		||||
                           : t_input_name + "_uncompressed",
 | 
			
		||||
                       std::ios::out | std::ios::binary};
 | 
			
		||||
  assert(output.is_open());
 | 
			
		||||
  uint16_t nb_chunks = 0;
 | 
			
		||||
  std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
 | 
			
		||||
  for (uint16_t i = 0; i < nb_chunks; ++i) {
 | 
			
		||||
    uncompress_chunk(input, output);
 | 
			
		||||
  }
 | 
			
		||||
  output.close();
 | 
			
		||||
  std::fclose(input);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void uncompress_chunk(FILE *const t_input, std::ofstream &t_output) {
 | 
			
		||||
  uint32_t size_chunk = 0;
 | 
			
		||||
  fread(&size_chunk, sizeof(size_chunk), 1, t_input);
 | 
			
		||||
  auto chunk = std::make_unique<unsigned char[]>(size_chunk);
 | 
			
		||||
  fread(chunk.get(), sizeof(unsigned char), size_chunk, t_input);
 | 
			
		||||
  auto unpacked = unpack(ustring{chunk.get(), chunk.get() + size_chunk});
 | 
			
		||||
  auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
 | 
			
		||||
  t_output.write(reinterpret_cast<const char *>(uncompressed_chunk.data()),
 | 
			
		||||
                 sizeof(uncompressed_chunk[0]) * uncompressed_chunk.size());
 | 
			
		||||
}
 | 
			
		||||
@ -1,16 +0,0 @@
 | 
			
		||||
#ifndef LZW_SRC_UNCOMPRESS_H_
 | 
			
		||||
#define LZW_SRC_UNCOMPRESS_H_
 | 
			
		||||
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] std::basic_string<unsigned char>
 | 
			
		||||
lzw_uncompress(std::vector<std::uint16_t> &&);
 | 
			
		||||
 | 
			
		||||
void uncompress(const std::string &, const char *);
 | 
			
		||||
 | 
			
		||||
void uncompress_chunk(FILE *, std::ofstream &);
 | 
			
		||||
 | 
			
		||||
#endif /* LZW_SRC_UNCOMPRESS_H_ */
 | 
			
		||||
							
								
								
									
										52
									
								
								src/utf8.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								src/utf8.cc
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,52 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file utf8.cc
 | 
			
		||||
 *   \brief Implementation for UTF-8 related functions
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "utf8.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
using FILE = std::FILE;
 | 
			
		||||
using uint8_t = std::uint8_t;
 | 
			
		||||
using uint32_t = std::uint32_t;
 | 
			
		||||
using ustring = std::basic_string<uint8_t>; // chaine non encodée
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 *  Les caractères \c passés en argument sont écrit dans le fichier de sortie au
 | 
			
		||||
 *  format UTF-8
 | 
			
		||||
 *
 | 
			
		||||
 *  \param[in] out Fichier de sortie
 | 
			
		||||
 *  \param[in] c Caractères à écrire dans \p out
 | 
			
		||||
 */
 | 
			
		||||
void write_utf8(FILE* t_out, uint32_t t_c) {
 | 
			
		||||
  if(t_c < 128) {
 | 
			
		||||
    fwrite(&t_c, sizeof(unsigned char), 1, t_out);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  size_t loops = 0;
 | 
			
		||||
  unsigned char header = 0;
 | 
			
		||||
  if (t_c < 2048) {
 | 
			
		||||
    loops = 1;
 | 
			
		||||
    header = 0xC0;
 | 
			
		||||
  } else if (t_c < 65536) {
 | 
			
		||||
    loops = 2;
 | 
			
		||||
    header = 0xE0;
 | 
			
		||||
  } else if (t_c < 2097152) {
 | 
			
		||||
    loops = 3;
 | 
			
		||||
    header = 0xF0;
 | 
			
		||||
  } else if (t_c < 67108864) {
 | 
			
		||||
    loops = 4;
 | 
			
		||||
    header = 0xF8;
 | 
			
		||||
  } else {
 | 
			
		||||
    loops = 5;
 | 
			
		||||
    header = 0xFC;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ustring str(loops + 1, 0);
 | 
			
		||||
  for (size_t i = 0; i <= loops; ++i) {
 | 
			
		||||
    str[i] = static_cast<unsigned char>(
 | 
			
		||||
        ((t_c & ((i == loops) ? 0x3F : 0xFF)) >> ((loops - i) * 6)) +
 | 
			
		||||
        ((i == 0) ? header : 0x80));
 | 
			
		||||
  }
 | 
			
		||||
  fwrite(str.data(), sizeof(unsigned char), str.size(), t_out);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										26
									
								
								src/utf8.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								src/utf8.hh
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,26 @@
 | 
			
		||||
/**
 | 
			
		||||
 *   \file utf8.hh
 | 
			
		||||
 *   \brief Header for UTF-8 related functions
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef LZW_SRC_UTF8_H_
 | 
			
		||||
#define LZW_SRC_UTF8_H_
 | 
			
		||||
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
  L’encodage des caractères se fait en UTF-8
 | 
			
		||||
  char < 128 => "0xxxxxxx" 7bits
 | 
			
		||||
  char < 2,048 => "110xxxxx 10xxxxxx" 11bits
 | 
			
		||||
  char < 65,536 => "1110xxxx 10xxxxxx 10xxxxxx" 16bits
 | 
			
		||||
  char < 2,097,152 => "11110xxx 10xxxxxx 10xxxxxx 10xxxxxx" 21bits
 | 
			
		||||
  char < 67,108,864 => "111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 26bits
 | 
			
		||||
  char < 2,147,483,648 => "1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 31bits
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/// \brief Écrit les caractères au format UTF-8
 | 
			
		||||
void write_utf8(std::FILE* t_out, std::uint32_t t_c);
 | 
			
		||||
 | 
			
		||||
#endif /* LZW_SRC_UTF8_H_ */
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user