33 Commits

Author SHA1 Message Date
Phuntsok Drak-pa
7775fec68e test nouvelles fonctions 2018-06-17 04:09:25 +02:00
Phuntsok Drak-pa
bae6d01bc8 fixed new bitpack size detection 2018-06-15 23:08:17 +02:00
Phuntsok Drak-pa
79770384c1 Deleted unused files 2018-06-15 19:58:25 +02:00
Phuntsok Drak-pa
9b892e12ee OSX deactivated for now 2018-06-15 19:54:47 +02:00
Phuntsok Drak-pa
2b3e5e3f2b Fixed bit-packing bug 2018-06-15 19:54:00 +02:00
Phuntsok Drak-pa
5b9f3ccd6a forgot to change this line 2018-06-11 22:11:58 +02:00
Phuntsok Drak-pa
6c3fb8b6a7 removed [[nodiscard]] and changed chunk binary format 2018-06-11 21:26:05 +02:00
Phuntsok Drak-pa
67a88aaf91 bug fixed as input stream was char and not unsigned char 2018-06-11 21:02:59 +02:00
Phuntsok Drak-pa
ec85a4b978 Some fixes, dictionary still broken 2018-06-11 16:34:35 +02:00
Phuntsok Drak-pa
e54e5fa07d Bit-unpacking FIXEDDDDDDDDDDDD 2018-06-11 00:58:01 +02:00
Phuntsok Drak-pa
5eb33fb04f I think I fixed the lzw algo, but somehow uncompression still broken 2018-06-10 23:44:10 +02:00
Phuntsok Drak-pa
fcfe944c5d Added (broken) uncompressing algo + fixed bit-unpacking 2018-06-10 21:21:06 +02:00
Phuntsok Drak-pa
9f70b01886 Moved ipow function 2018-06-09 23:11:27 +02:00
Phuntsok Drak-pa
f8b493de2b added bit-unpacking algorithm 2018-06-09 22:59:11 +02:00
Phuntsok Drak-pa
1909b52c32 bugfix 2018-06-09 11:00:34 +02:00
Phuntsok Drak-pa
8e04f74bba bugfix 2018-06-09 03:30:37 +02:00
Phuntsok Drak-pa
6d779d8606 Changed file writing method to dynamic bit-packing 2018-06-09 03:18:28 +02:00
Phuntsok Drak-pa
d901cd60f2 removed bettercode 2018-06-09 02:50:48 +02:00
Phuntsok Drak-pa
cbd6d89234 Added function for bit-packing compression-side 2018-06-09 02:44:03 +02:00
Phuntsok Drak-pa
b807ee259e changed some types 2018-06-05 11:38:27 +02:00
Phuntsok Drak-pa
816b5cf1a7 update CircleCI 2018-05-25 13:39:55 +02:00
Phuntsok Drak-pa
3413bc9372 Added doxygen and graphviz to release builds on TravisCI and CircleCI 2018-05-25 13:36:21 +02:00
Phuntsok Drak-pa
74804a1ead Bugfixes and additional changes 2018-05-25 13:28:37 +02:00
Phuntsok Drak-pa
3bd2e15c76 removed unused prototype and header 2018-05-25 13:22:51 +02:00
c3ec86de87 Added documentation generation as CMake target for Release 2018-05-25 13:20:46 +02:00
Phuntsok Drak-pa
5ee86709ee I'm so stupid omb... 2018-05-25 12:13:21 +02:00
Phuntsok Drak-pa
44434df096 Code cleanup 2018-05-25 12:13:12 +02:00
Phuntsok Drak-pa
72c71c306f Changed I/O, compression behavior, removed threads 2018-05-25 12:00:58 +02:00
Phuntsok Drak-pa
2cfb560153 For some reason it worked before but not anymore 2018-05-25 12:00:30 +02:00
Phuntsok Drak-pa
c2f3621815 updated bettercodehub settings 2018-05-23 22:54:41 +02:00
Phuntsok Drak-pa
2366693069 updated bettercodehub settings 2018-05-23 22:51:27 +02:00
Phuntsok Drak-pa
e5c6ce7afc Added CircleCI badge 2018-05-23 18:11:41 +02:00
Phuntsok Drak-pa
440b38bf22 Merge pull request #4 from Phundrak/circle-ci
Circle ci
2018-05-23 18:03:00 +02:00
24 changed files with 1439 additions and 3161 deletions

View File

@@ -1,6 +0,0 @@
component_depth: 2
languages:
- cpp
exclude:
- includes/*
- .*/*

View File

@@ -5,4 +5,5 @@ jobs:
- image: purplekarrot/gcc-8 - image: purplekarrot/gcc-8
steps: steps:
- checkout - checkout
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j - run: apt-get update && apt-get install -y doxygen doxygen-docs doxygen-latex doxygen-gui graphviz
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j

6
.gitignore vendored
View File

@@ -7,3 +7,9 @@ cmake-build-debug/
\.idea/ \.idea/
*.lzw *.lzw
docs/Doxyfile
docs/html/
docs/latex/

View File

@@ -11,6 +11,11 @@ addons:
packages: packages:
- g++-7 - g++-7
- clang-5.0 - clang-5.0
- doxygen
- doxygen-doc
- doxygen-latex
- doxygen-gui
- graphviz
matrix: matrix:
include: include:
@@ -39,24 +44,25 @@ matrix:
- eval "${MATRIX_EVAL}" - eval "${MATRIX_EVAL}"
# OSX Clang # OSX Clang
- os: osx # - os: osx
osx_image: xcode9.3 # osx_image: xcode9.3
env: # env:
BUILD_TYPE=Release # BUILD_TYPE=Release
BIN_DIR=bin # BIN_DIR=bin
before_install: # before_install:
brew update # - brew update
install: # - brew install doxygen graphviz
brew upgrade cmake # install:
- os: osx # brew upgrade cmake
osx_image: xcode9.3 # - os: osx
env: # osx_image: xcode9.3
BUILD_TYPE=Debug # env:
BIN_DIR=debug # BUILD_TYPE=Debug
before_install: # BIN_DIR=debug
brew update # before_install:
install: # brew update
brew upgrade cmake # install:
# brew upgrade cmake
script: script:
- cd build - cd build
- cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE .. - cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..

View File

@@ -1,6 +1,5 @@
cmake_minimum_required(VERSION 3.8 FATAL_ERROR) cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
set(CMAKE_LEGACY_CYGWIN_WIN32 0) set(CMAKE_LEGACY_CYGWIN_WIN32 0)
set(CMAKE_BUILD_TYPE Debug)
project("projet_lzw") project("projet_lzw")
@@ -8,7 +7,7 @@ set(TGT "projet_lzw")
set(${TGT}_VERSION_MAJOR 0) set(${TGT}_VERSION_MAJOR 0)
set(${TGT}_VERSION_MINOR 1) set(${TGT}_VERSION_MINOR 1)
set(CXX_COVERAGE_COMPILE_FLAGS "-pedantic -Wall -Wextra -Wold-style-cast -Woverloaded-virtual -Wfloat-equal -Wwrite-strings -Wpointer-arith -Wcast-qual -Wcast-align -Wconversion -Wsign-conversion -Wshadow -Weffc++ -Wredundant-decls -Wdouble-promotion -Winit-self -Wswitch-default -Wswitch-enum -Wundef -Winline -Wunused -Wnon-virtual-dtor -pthread") set(CXX_COVERAGE_COMPILE_FLAGS "-pedantic -Wall -Wextra -Wold-style-cast -Woverloaded-virtual -Wfloat-equal -Wwrite-strings -Wpointer-arith -Wcast-qual -Wcast-align -Wshadow -Weffc++ -Wredundant-decls -Wdouble-promotion -Winit-self -Wswitch-default -Wswitch-enum -Wundef -Winline -Wunused -Wnon-virtual-dtor -Wno-conversion -pthread")
set(CMAKE_CXX_FLAGS_DEBUG "${CXX_COVERAGE_COMPILE_FLAGS} -DDebug -g -pg") set(CMAKE_CXX_FLAGS_DEBUG "${CXX_COVERAGE_COMPILE_FLAGS} -DDebug -g -pg")
set(CMAKE_CXX_FLAGS_RELEASE "${CXX_COVERAGE_COMPILE_FLAGS} -O3") set(CMAKE_CXX_FLAGS_RELEASE "${CXX_COVERAGE_COMPILE_FLAGS} -O3")
@@ -26,8 +25,6 @@ else()
message( FATAL_ERROR "C++17 not supported, CMake will exit." ) message( FATAL_ERROR "C++17 not supported, CMake will exit." )
endif() endif()
endif() endif()
# set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES) set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
@@ -37,6 +34,26 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "../debug/")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
# indicate the doc build as an option, ON by default
if(CMAKE_BUILD_TYPE MATCHES "^[Rr]elease")
option(BUILD_DOC "Build documentation" ON)
find_package(Doxygen
REQUIRED dot)
if(DOXYGEN_FOUND)
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile.in)
set(DOXYGEN_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile)
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
message("Doxygen build started")
add_custom_target(docs ALL
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen"
VERBATIM )
else()
message("Doxygen needs to be installed to generate the doxygen documentation.")
endif()
endif()
include_directories(includes) include_directories(includes)
file(GLOB SOURCES "src/*.cc") file(GLOB SOURCES "src/*.cc")
add_executable(${TGT} ${SOURCES}) add_executable(${TGT} ${SOURCES})

View File

@@ -1,8 +1,8 @@
[![CircleCI](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master.svg?style=svg)](https://circleci.com/gh/Phundrak/lzw-assignment/tree/master)
[![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment) [![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade)
[![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment) [![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
[![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org) [![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org)
[![BCH compliance](https://bettercodehub.com/edge/badge/Phundrak/lzw-assignment?branch=master)](https://bettercodehub.com/)
# LZW Compressing tool # LZW Compressing tool

3
doc/.gitignore vendored
View File

@@ -1,3 +0,0 @@
*
!.gitignore
!Doxyfile

File diff suppressed because it is too large Load Diff

33
docs/Doxyfile.in Normal file
View File

@@ -0,0 +1,33 @@
OUTPUT_DIRECTORY = @CMAKE_CURRENT_SOURCE_DIR@/docs/
INPUT = @CMAKE_CURRENT_SOURCE_DIR@/src/ @CMAKE_CURRENT_SOURCE_DIR@/docs
DOXYFILE ENCODING = UTF-8
PROJECT_NAME = "Compression LZW"
PROJECT_NUMBER = 0.3
PROJECT_BRIEF = "Utilitaire de compression/décompression de fichiers via lalgorithme LZW"
ALLOW_UNICODE_NAMES = YES
OUTPUT_LANGUAGE = French
FULL_PATH_NAMES = NO
TAB_SIZE = 2
EXTRACT_ALL = YES
CASE_SENSE_NAMES = YES
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.cc \
*.hh
RECURSIVE = YES
EXAMPLE_PATTERNS = *
SOURCE_BROWSER = YES
INLINE_SOURCES = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
HTML_TIMESTAMP = YES
GENERATE_DOCSET = NO
GENERATE_HTMLHELP = NO
LATEX_SOURCE_CODE = YES
LATEX_TIMESTAMP = YES
ENABLE_PREPROCESSING = NO
HAVE_DOT = YES
UML_LOOK = YES
CALL_GRAPH = YES
CALLER_GRAPH = YES
INTERACTIVE_SVG = YES

View File

@@ -1,244 +0,0 @@
/*
Copyright 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "getopt.h"
#include <stddef.h>
#include <stdio.h>
#include <string.h>
char *optarg;
int optind = 1, opterr = 1, optopt, __optpos, optreset = 0;
#define optpos __optpos
static void __getopt_msg(const char *a, const char *b, const char *c,
size_t l) {
FILE *f = stderr;
flockfile(f);
fputs(a, f) >= 0 && fwrite(b, strlen(b), 1, f) &&
fwrite(c, 1, l, f) == l &&putc('\n', f);
funlockfile(f);
}
int getopt(int argc, char *const argv[], const char *optstring) {
int i, c, d;
int k, l;
char *optchar;
if (!optind || optreset) {
optreset = 0;
__optpos = 0;
optind = 1;
}
if (optind >= argc || !argv[optind])
return -1;
if (argv[optind][0] != '-') {
if (optstring[0] == '-') {
optarg = argv[optind++];
return 1;
}
return -1;
}
if (!argv[optind][1])
return -1;
if (argv[optind][1] == '-' && !argv[optind][2])
return optind++, -1;
if (!optpos)
optpos++;
c = argv[optind][optpos], k = 1;
optchar = argv[optind] + optpos;
optopt = c;
optpos += k;
if (!argv[optind][optpos]) {
optind++;
optpos = 0;
}
if (optstring[0] == '-' || optstring[0] == '+')
optstring++;
i = 0;
d = 0;
do {
d = optstring[i], l = 1;
if (l > 0)
i += l;
else
i++;
} while (l && d != c);
if (d != c) {
if (optstring[0] != ':' && opterr)
__getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
return '?';
}
if (optstring[i] == ':') {
if (optstring[i + 1] == ':')
optarg = 0;
else if (optind >= argc) {
if (optstring[0] == ':')
return ':';
if (opterr)
__getopt_msg(argv[0], ": option requires an argument: ", optchar, k);
return '?';
}
if (optstring[i + 1] != ':' || optpos) {
optarg = argv[optind++] + optpos;
optpos = 0;
}
}
return c;
}
static void permute(char *const *argv, int dest, int src) {
char **av = (char **)argv;
char *tmp = av[src];
int i;
for (i = src; i > dest; i--)
av[i] = av[i - 1];
av[dest] = tmp;
}
static int __getopt_long_core(int argc, char *const *argv,
const char *optstring,
const struct option *longopts, int *idx,
int longonly) {
optarg = 0;
if (longopts && argv[optind][0] == '-' &&
((longonly && argv[optind][1] && argv[optind][1] != '-') ||
(argv[optind][1] == '-' && argv[optind][2]))) {
int colon = optstring[optstring[0] == '+' || optstring[0] == '-'] == ':';
int i, cnt, match;
char *opt;
for (cnt = i = 0; longopts[i].name; i++) {
const char *name = longopts[i].name;
opt = argv[optind] + 1;
if (*opt == '-')
opt++;
for (; *name && *name == *opt; name++, opt++)
;
if (*opt && *opt != '=')
continue;
match = i;
if (!*name) {
cnt = 1;
break;
}
cnt++;
}
if (cnt == 1) {
i = match;
optind++;
optopt = longopts[i].val;
if (*opt == '=') {
if (!longopts[i].has_arg) {
if (colon || !opterr)
return '?';
__getopt_msg(argv[0],
": option does not take an argument: ", longopts[i].name,
strlen(longopts[i].name));
return '?';
}
optarg = opt + 1;
} else if (longopts[i].has_arg == required_argument) {
if (!(optarg = argv[optind])) {
if (colon)
return ':';
if (!opterr)
return '?';
__getopt_msg(argv[0],
": option requires an argument: ", longopts[i].name,
strlen(longopts[i].name));
return '?';
}
optind++;
}
if (idx)
*idx = i;
if (longopts[i].flag) {
*longopts[i].flag = longopts[i].val;
return 0;
}
return longopts[i].val;
}
if (argv[optind][1] == '-') {
if (!colon && opterr)
__getopt_msg(argv[0],
cnt ? ": option is ambiguous: "
: ": unrecognized option: ",
argv[optind] + 2, strlen(argv[optind] + 2));
optind++;
return '?';
}
}
return getopt(argc, argv, optstring);
}
static int __getopt_long(int argc, char *const *argv, const char *optstring,
const struct option *longopts, int *idx,
int longonly) {
int ret, skipped, resumed;
if (!optind || optreset) {
optreset = 0;
__optpos = 0;
optind = 1;
}
if (optind >= argc || !argv[optind])
return -1;
skipped = optind;
if (optstring[0] != '+' && optstring[0] != '-') {
int i;
for (i = optind;; i++) {
if (i >= argc || !argv[i])
return -1;
if (argv[i][0] == '-' && argv[i][1])
break;
}
optind = i;
}
resumed = optind;
ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly);
if (resumed > skipped) {
int i, cnt = optind - resumed;
for (i = 0; i < cnt; i++)
permute(argv, skipped, optind - 1);
optind = skipped + cnt;
}
return ret;
}
int getopt_long(int argc, char *const *argv, const char *optstring,
const struct option *longopts, int *idx) {
return __getopt_long(argc, argv, optstring, longopts, idx, 0);
}
int getopt_long_only(int argc, char *const *argv, const char *optstring,
const struct option *longopts, int *idx) {
return __getopt_long(argc, argv, optstring, longopts, idx, 1);
}

View File

@@ -1,54 +0,0 @@
/*
Copyright 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _GETOPT_H
#define _GETOPT_H
#ifdef __cplusplus
extern "C" {
#endif
int getopt(int, char *const[], const char *);
extern char *optarg;
extern int optind, opterr, optopt, optreset;
struct option {
const char *name;
int has_arg;
int *flag;
int val;
};
int getopt_long(int, char *const *, const char *, const struct option *, int *);
int getopt_long_only(int, char *const *, const char *, const struct option *,
int *);
#define no_argument 0
#define required_argument 1
#define optional_argument 2
#ifdef __cplusplus
}
#endif
#endif

960
src/bitpack.cc Normal file
View File

@@ -0,0 +1,960 @@
#include "bitpack.hh"
#include "common.hh"
#include <algorithm>
#include <cmath>
using std::uint16_t;
using std::vector;
using uchar = unsigned char;
using vuint16 = vector<uint16_t>;
using vuchar = vector<uchar>;
using ustring = std::basic_string<unsigned char>;
int max(const int t_n) { return ipow(2, t_n) - 1; }
///////////////////////////////////////////////////////////////////////////////
// packing //
///////////////////////////////////////////////////////////////////////////////
vuchar pack(const vuint16 &t_input) {
return pack_9(t_input.begin(), t_input.end());
}
vuchar pack_9(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end) {
#ifdef Debug
std::printf("9 bits\n");
#endif
vuchar res{};
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 8) {
case 0: {
res.push_back(static_cast<uchar>(*it >> 1u & 0xffu));
current_char = static_cast<uchar>(*it << 7u & 0xffu);
break;
}
case 1: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 2u & 0xffu)));
current_char = static_cast<uchar>(*it << 6u & 0xffu);
break;
}
case 2: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 3u & 0xffu)));
current_char = static_cast<uchar>(*it << 5u & 0xffu);
break;
}
case 3: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 4u & 0xffu)));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 4: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 5u & 0xffu)));
current_char = static_cast<uchar>(*it << 3u & 0xffu);
break;
}
case 5: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 6u & 0xffu)));
current_char = static_cast<uchar>(*it << 2u & 0xffu);
break;
}
case 6: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 7u & 0xffu)));
current_char = static_cast<uchar>(*it << 1u & 0xffu);
break;
}
case 7: {
res.push_back(
static_cast<uchar>(current_char | (*it >> 8u & 0xffu)));
res.push_back(static_cast<uchar>(*it & 0xffu));
break;
}
default:
exit(2);
}
if (*it >= max(9)) {
if(round % 8 != 0) {
res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), res.size());
#endif
return pack_10(++it, t_end, res);
}
++round;
}
res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), res.size());
#endif
return res;
}
vuchar pack_10(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
#ifdef Debug
std::printf("10 bits\n");
#endif
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 4) {
case 0: {
t_res.push_back(static_cast<uchar>(*it >> 2u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 1: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 4u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 2: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 6u & 0xffu));
current_char = static_cast<uchar>(*it << 2u & 0xffu);
break;
}
case 3: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 8u & 0xffu));
t_res.push_back(static_cast<uchar>(*it & 0xffu));
break;
}
default:
exit(2);
}
if (*it >= max(10)) {
if(round % 4 != 0) {
t_res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return pack_11(++it, t_end, t_res);
}
++round;
}
t_res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
vuchar pack_11(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
#ifdef Debug
std::printf("11 bits\n");
#endif
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 8) {
case 0: {
t_res.push_back(static_cast<uchar>(*it >> 3u & 0xffu));
current_char = static_cast<uchar>(*it << 5u & 0xffu);
break;
}
case 1: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 6u & 0xffu));
current_char = static_cast<uchar>(*it << 2u & 0xffu);
break;
}
case 2: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 11u & 0xffu));
t_res.push_back(static_cast<uchar>(*it << 1 & 0xffu));
current_char = static_cast<uchar>(*it << 7u & 0xffu);
break;
}
case 3: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 12u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 4: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 7u & 0xffu));
current_char = static_cast<uchar>((*it << 1u) & 0xffu);
break;
}
case 5: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 10u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 2u & 0xffu));
current_char = static_cast<uchar>(*it << 6u & 0xffu);
break;
}
case 6: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 5u & 0xffu));
current_char = static_cast<uchar>(*it << 3u & 0xffu);
break;
}
case 7: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 8u & 0xffu));
current_char = static_cast<uchar>(*it & 0xffu);
break;
}
default:
exit(2);
}
if (*it >= max(11)) {
if(round % 8 != 0) {
t_res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return pack_12(++it, t_end, t_res);
}
++round;
}
t_res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
vuchar pack_12(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
#ifdef Debug
std::printf("12 bits\n");
#endif
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 2) {
case 0: {
t_res.push_back(static_cast<uchar>(*it >> 4u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 1: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 8u & 0xffu));
current_char = static_cast<uchar>(*it & 0xffu);
break;
}
default:
exit(2);
}
if (*it >= max(12)) {
if(round % 2 != 0) {
t_res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return pack_13(++it, t_end, t_res);
}
++round;
}
t_res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
vuchar pack_13(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
#ifdef Debug
std::printf("13 bits\n");
#endif
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 8) {
case 0: {
t_res.push_back(static_cast<uchar>(*it >> 5u & 0xffu));
current_char = static_cast<uchar>(*it << 3u & 0xffu);
break;
}
case 1: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 10u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 2u & 0xffu));
current_char = static_cast<uchar>(*it << 6u & 0xffu);
break;
}
case 2: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 7u & 0xffu));
current_char = static_cast<uchar>(*it << 1u & 0xffu);
break;
}
case 3: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 12u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 4u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 4: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 9u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 1u & 0xffu));
current_char = static_cast<uchar>(*it << 7u & 0xffu);
break;
}
case 5: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 6u & 0xffu));
current_char = static_cast<uchar>(*it << 2u & 0xffu);
break;
}
case 6: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 11u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 3u & 0xffu));
current_char = static_cast<uchar>(*it << 5u & 0xffu);
break;
}
case 7: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 8u & 0xffu));
current_char = static_cast<uchar>(*it & 0xffu);
break;
}
default:
exit(2);
}
if (*it >= max(13)) {
if(round % 8 != 0) {
t_res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return pack_14(++it, t_end, t_res);
}
++round;
}
t_res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
vuchar pack_14(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
#ifdef Debug
std::printf("14 bits\n");
#endif
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 4) {
case 0: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 6u & 0xffu));
current_char = static_cast<uchar>(*it << 2u & 0xffu);
break;
}
case 1: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 12u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 4u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 2: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 10u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 2u & 0xffu));
current_char = static_cast<uchar>(*it << 6u & 0xffu);
break;
}
case 3: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 8u & 0xffu));
current_char = static_cast<uchar>(*it & 0xffu);
break;
}
default:
exit(2);
}
if (*it >= max(14)) {
if(round % 4 != 0) {
t_res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return pack_15(++it, t_end, t_res);
}
++round;
}
t_res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
vuchar pack_15(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
#ifdef Debug
std::printf("15 bits\n");
#endif
uchar current_char = 0;
int round = 0;
for (auto it = t_input; it != t_end; ++it) {
switch (round % 8) {
case 0: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 7u & 0xffu));
current_char = static_cast<uchar>(*it << 1u & 0xffu);
break;
}
case 1: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 14u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 6u & 0xffu));
current_char = static_cast<uchar>(*it << 2u & 0xffu);
break;
}
case 2: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 13u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 5u & 0xffu));
current_char = static_cast<uchar>(*it << 3u & 0xffu);
break;
}
case 3: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 12u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 4u & 0xffu));
current_char = static_cast<uchar>(*it << 4u & 0xffu);
break;
}
case 4: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 11u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 3u & 0xffu));
current_char = static_cast<uchar>(*it << 5u & 0xffu);
break;
}
case 5: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 10u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 2u & 0xffu));
current_char = static_cast<uchar>(*it << 6u & 0xffu);
break;
}
case 6: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 9u & 0xffu));
t_res.push_back(static_cast<uchar>(*it >> 1u & 0xffu));
current_char = static_cast<uchar>(*it << 7u & 0xffu);
break;
}
case 7: {
t_res.push_back(current_char | static_cast<uchar>(*it >> 8u & 0xffu));
current_char = static_cast<uchar>(*it & 0xffu);
break;
}
default:
exit(2);
}
if (*it >= max(15)) {
if(round % 8 != 0) {
t_res.push_back(current_char);
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return pack_16(++it, t_end, t_res);
}
++round;
}
t_res.push_back(current_char);
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
vuchar pack_16(const vuint16::const_iterator t_input,
const vuint16::const_iterator t_end, vuchar &t_res) {
std::for_each(t_input, t_end, [&](const auto value) {
t_res.push_back(static_cast<uchar>(value >> 8 & 0xFFu));
t_res.push_back(static_cast<uchar>(value & 0xFFu));
});
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, t_end), t_res.size());
#endif
return t_res;
}
///////////////////////////////////////////////////////////////////////////////
// unpacking //
///////////////////////////////////////////////////////////////////////////////
constexpr uint16_t mask_n(int t_nb_bits) {
if (t_nb_bits == 0) {
return 0;
}
uint16_t mask = mask_n(t_nb_bits - 1);
mask = static_cast<uint16_t>(mask << 1);
mask |= 0x1;
return mask;
}
constexpr uint16_t masks[17] = {
mask_n(0), mask_n(1), mask_n(2), mask_n(3), mask_n(4), mask_n(5),
mask_n(6), mask_n(7), mask_n(8), mask_n(9), mask_n(10), mask_n(11),
mask_n(12), mask_n(13), mask_n(14), mask_n(15), mask_n(16)};
vuint16 unpack(ustring &&t_input) {
return unpack_9(t_input.begin(), t_input.end());
}
vuint16 unpack_9(const ustring::const_iterator t_input,
const ustring::const_iterator t_end) {
#ifdef Debug
std::printf("9 bits\n");
#endif
const auto max_val = max(9);
int round = 0;
uint16_t current_char = 0;
auto it = t_input;
vuint16 res{};
while (current_char < max_val && it < t_end - 1) {
switch (round % 8) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 1u;
current_char |= static_cast<uint16_t>(*++it) >> 7u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 2u;
current_char |= static_cast<uint16_t>(*++it) >> 6u;
break;
}
case 2: {
current_char = static_cast<uint16_t>(*it) << 3u;
current_char |= static_cast<uint16_t>(*++it) >> 5u;
break;
}
case 3: {
current_char = static_cast<uint16_t>(*it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 4: {
current_char = static_cast<uint16_t>(*it) << 5u;
current_char |= static_cast<uint16_t>(*++it) >> 3u;
break;
}
case 5: {
current_char = static_cast<uint16_t>(*it) << 6u;
current_char |= static_cast<uint16_t>(*++it) >> 2u;
break;
}
case 6: {
current_char = static_cast<uint16_t>(*it) << 7u;
current_char |= static_cast<uint16_t>(*++it) >> 1u;
break;
}
case 7: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
++it;
break;
}
default:
exit(2);
}
current_char &= masks[9];
res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), res.size());
#endif
return unpack_10(it, t_end, res);
}
return res;
}
vuint16 unpack_10(const ustring::const_iterator t_input,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("10 bits\n");
#endif
const auto max_val = max(10);
int round = 0;
uint16_t current_char = 0;
auto it = t_input;
while (current_char < max_val && it < t_end - 1) {
switch (round % 4) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 2u;
current_char |= static_cast<uint16_t>(*++it) >> 6u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 2: {
current_char = static_cast<uint16_t>(*it) << 6u;
current_char |= static_cast<uint16_t>(*++it) >> 2u;
break;
}
case 3: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
++it;
break;
}
default:
exit(2);
}
current_char &= masks[10];
t_res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return unpack_11(it, t_end, t_res);
}
return t_res;
}
vuint16 unpack_11(const ustring::const_iterator t_input,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("11 bits\n");
#endif
const auto max_val = max(11);
int round = 0;
uint16_t current_char = 0;
auto it = t_input;
while (current_char < max_val && it < t_end - 1) {
switch (round % 8) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 3u;
current_char |= static_cast<uint16_t>(*++it) >> 5u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 6u;
current_char |= static_cast<uint16_t>(*++it) >> 2u;
break;
}
case 2: {
current_char = static_cast<uint16_t>(*it) << 9u;
current_char |= static_cast<uint16_t>(*++it) << 1u;
current_char |= static_cast<uint16_t>(*++it) >> 7u;
break;
}
case 3: {
current_char = static_cast<uint16_t>(*it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 4: {
current_char = static_cast<uint16_t>(*it) << 7u;
current_char |= static_cast<uint16_t>(*++it) >> 1u;
break;
}
case 5: {
current_char = static_cast<uint16_t>(*it) << 10u;
current_char |= static_cast<uint16_t>(*++it) << 2u;
current_char |= static_cast<uint16_t>(*++it) >> 6u;
break;
}
case 6: {
current_char = static_cast<uint16_t>(*it) << 5u;
current_char |= static_cast<uint16_t>(*++it) >> 3u;
break;
}
case 7: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
++it;
break;
}
default:
exit(2);
}
current_char &= masks[11];
t_res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return unpack_12(it, t_end, t_res);
}
return t_res;
}
vuint16 unpack_12(const ustring::const_iterator t_input,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("12 bits\n");
#endif
const auto max_val = max(12);
int round = 0;
uint16_t current_char = 0;
auto it = t_input;
while (current_char < max_val && it < t_end - 1) {
switch (round % 2) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
++it;
break;
}
default:
exit(2);
}
current_char &= masks[12];
t_res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%d\n", current_char);
#endif
return unpack_13(it, t_end, t_res);
}
return t_res;
}
vuint16 unpack_13(const ustring::const_iterator t_input,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("13 bits\n");
#endif
const auto max_val = max(13);
int round = 0;
uint16_t current_char = 0;
auto it = t_input;
while (current_char < max_val && it < t_end - 1) {
switch (round % 8) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 5u;
current_char |= static_cast<uint16_t>(*++it) >> 3u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 10u;
current_char |= static_cast<uint16_t>(*++it) << 2u;
current_char |= static_cast<uint16_t>(*++it) >> 6u;
break;
}
case 2: {
current_char = static_cast<uint16_t>(*it) << 7u;
current_char |= static_cast<uint16_t>(*++it) >> 1u;
break;
}
case 3: {
current_char = static_cast<uint16_t>(*it) << 12u;
current_char |= static_cast<uint16_t>(*++it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 4: {
current_char = static_cast<uint16_t>(*it) << 9u;
current_char |= static_cast<uint16_t>(*++it) << 1u;
current_char |= static_cast<uint16_t>(*++it) >> 7u;
break;
}
case 5: {
current_char = static_cast<uint16_t>(*it) << 6u;
current_char |= static_cast<uint16_t>(*++it) >> 2u;
break;
}
case 6: {
current_char = static_cast<uint16_t>(*it) << 11u;
current_char |= static_cast<uint16_t>(*++it) << 3u;
current_char |= static_cast<uint16_t>(*++it) >> 5u;
break;
}
case 7: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
break;
}
default:
exit(2);
}
current_char &= masks[13];
t_res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return unpack_14(it, t_end, t_res);
}
return t_res;
}
vuint16 unpack_14(const ustring::const_iterator t_input,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("14 bits\n");
#endif
const auto max_val = max(14);
int round = 0;
uint16_t current_char = 0;
auto it = t_input;
while (current_char < max_val && it < t_end - 1) {
switch (round % 4) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 6u;
current_char |= static_cast<uint16_t>(*++it) >> 2u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 12u;
current_char |= static_cast<uint16_t>(*++it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 2: {
current_char = static_cast<uint16_t>(*it) << 10u;
current_char |= static_cast<uint16_t>(*++it) << 2u;
current_char |= static_cast<uint16_t>(*++it) >> 6u;
break;
}
case 3: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
break;
}
default:
exit(2);
}
current_char &= masks[14];
t_res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_input, it), t_res.size());
#endif
return unpack_15(it, t_end, t_end, t_res);
}
return t_res;
}
vuint16 unpack_15(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("15 bits\n");
#endif
const auto max_val = max(15);
int round = 0;
uint16_t current_char = 0;
auto it = t_begin;
while (current_char < max_val && it < t_end - 1) {
switch (round % 8) {
case 0: {
current_char = static_cast<uint16_t>(*it) << 7u;
current_char |= static_cast<uint16_t>(*++it) >> 1u;
break;
}
case 1: {
current_char = static_cast<uint16_t>(*it) << 14u;
current_char |= static_cast<uint16_t>(*++it) << 6u;
current_char |= static_cast<uint16_t>(*++it) >> 2u;
break;
}
case 2: {
current_char = static_cast<uint16_t>(*it) << 13u;
current_char |= static_cast<uint16_t>(*++it) << 5u;
current_char |= static_cast<uint16_t>(*++it) >> 3u;
break;
}
case 3: {
current_char = static_cast<uint16_t>(*it) << 12u;
current_char |= static_cast<uint16_t>(*++it) << 4u;
current_char |= static_cast<uint16_t>(*++it) >> 4u;
break;
}
case 4: {
current_char = static_cast<uint16_t>(*it) << 11u;
current_char |= static_cast<uint16_t>(*++it) << 3u;
current_char |= static_cast<uint16_t>(*++it) >> 5u;
break;
}
case 5: {
current_char = static_cast<uint16_t>(*it) << 10u;
current_char |= static_cast<uint16_t>(*++it) << 2u;
current_char |= static_cast<uint16_t>(*++it) >> 6u;
break;
}
case 6: {
current_char = static_cast<uint16_t>(*it) << 9u;
current_char |= static_cast<uint16_t>(*++it) << 1u;
current_char |= static_cast<uint16_t>(*++it) >> 7u;
break;
}
case 7: {
current_char = static_cast<uint16_t>(*it) << 8u;
current_char |= static_cast<uint16_t>(*++it);
break;
}
default:
exit(2);
}
current_char &= masks[15];
t_res.push_back(current_char);
++round;
}
if (current_char >= max_val) {
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_begin, it), t_res.size());
#endif
return unpack_16(it, t_end, t_res);
}
return t_res;
}
vuint16 unpack_16(const ustring::const_iterator t_begin,
const ustring::const_iterator t_end,
vuint16& t_res) {
#ifdef Debug
std::printf("16 bits\n");
#endif
for (auto it = t_begin; it < t_end; ++it) {
t_res.push_back(static_cast<uint16_t>(*it << 8u) |
static_cast<uint16_t>(*++it & 0xffu));
}
#ifdef Debug
std::printf("%ld elements written\n%ld chars written so far\n",
std::distance(t_begin, t_end), t_res.size());
#endif
return t_res;
}

95
src/bitpack.hh Normal file
View File

@@ -0,0 +1,95 @@
/**
* \file bitpack.hh
* \brief Header for bit-packing functions
*/
#ifndef LZW_SRC_BITPACK_H_
#define LZW_SRC_BITPACK_H_
#include <cstdint>
#include <string>
#include <vector>
///////////////////////////////////////////////////////////////////////////////
// packing //
///////////////////////////////////////////////////////////////////////////////
std::vector<unsigned char> pack(const std::vector<std::uint16_t> &);
std::vector<unsigned char> pack_9(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator);
std::vector<unsigned char> pack_10(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<unsigned char> pack_11(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<unsigned char> pack_12(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<unsigned char> pack_13(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<unsigned char> pack_14(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<unsigned char> pack_15(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
std::vector<unsigned char> pack_16(std::vector<std::uint16_t>::const_iterator,
std::vector<std::uint16_t>::const_iterator,
std::vector<unsigned char> &);
///////////////////////////////////////////////////////////////////////////////
// unpack //
///////////////////////////////////////////////////////////////////////////////
std::vector<std::uint16_t> unpack(std::basic_string<unsigned char> &&);
std::vector<std::uint16_t>
unpack_9(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator);
std::vector<std::uint16_t>
unpack_10(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
std::vector<std::uint16_t>
unpack_11(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
std::vector<std::uint16_t>
unpack_12(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
std::vector<std::uint16_t>
unpack_13(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
std::vector<std::uint16_t>
unpack_14(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
std::vector<std::uint16_t>
unpack_15(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
std::vector<std::uint16_t>
unpack_16(std::basic_string<unsigned char>::const_iterator,
std::basic_string<unsigned char>::const_iterator,
std::vector<std::uint16_t> &);
#endif /* LZW_SRC_BITPACK_H_ */

View File

@@ -5,40 +5,92 @@
#include "common.hh" #include "common.hh"
using uint8_t = std::uint8_t; using std::uint16_t;
using uint32_t = std::uint32_t; using std::uint8_t;
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>; using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using ustring = std::basic_string<unsigned char>;
using p_ustring = std::shared_ptr<ustring>;
int ipow(int base, int exp) {
int result = 1;
for (;;) {
if (exp & 1) {
result *= base;
}
exp >>= 1;
if (exp == 0) {
break;
}
base *= base;
}
return result;
}
/** /**
* Cette fonction a pour double usage la recherche dune chaine de caractères * Cette fonction a pour double usage la recherche dune chaine de caractères
* dans le dictionnaire, ou bien lajout dune nouvelle chaîne si celle-ci nest * dans le dictionnaire, ou bien lajout dune nouvelle chaîne si celle-ci
* pas déjà présente. Une chaine de caractères est représentée par un couple * nest pas déjà présente. Une chaine de caractères est représentée par un
* numéro de chaine / caractère, le numéro de chaine renvoyant au caractère * couple numéro de chaine / caractère, le numéro de chaine renvoyant au
* précédent (soit son code ASCII, soit son indice dans le dictionnaire) et le * caractère précédent (soit son code ASCII, soit son indice dans le
* caractère se référant au dernier caractère de la chaine courante. Si le * dictionnaire) et le caractère se référant au dernier caractère de la chaine
* numéro de chaine est -1, alors il sagit du premier caractère de la chaine, * courante. Si le numéro de chaine est -1, alors il sagit du premier caractère
* et la valeur renvoyée sera la valeur ASCII du caractère. La fonction renvoie * de la chaine, et la valeur renvoyée sera la valeur ASCII du caractère. La
* une paire bool/uint32_t, la valeur booléene indiquant si une nouvelle fut * fonction renvoie une paire bool/uint16_t, la valeur booléene indiquant si une
* ajoutée dans le dictionnaire ou non, et le uint32_t indiquant la valeur * nouvelle fut ajoutée dans le dictionnaire ou non, et le uint16_t indiquant la
* numérique de la chaîne dans le dictionnaire. * valeur numérique de la chaîne dans le dictionnaire.
* *
* \param t_dictionary Dictionnaire * \param t_dictionary Dictionnaire
* \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary * \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine * \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
* \return const std::pair<bool, uint32_t> * \return const std::pair<bool, uint16_t>
*/ */
const std::pair<bool, uint32_t> std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
dico(std::map<std::pair<uint32_t, uint8_t>, uint32_t> &t_dictionary, const uint16_t t_nr_chaine, const uint8_t t_c) {
uint32_t t_nr_chaine, uint8_t t_c) {
if (t_nr_chaine == 0xFFFF) { if (t_nr_chaine == 0xFFFF) {
return std::make_pair(true, t_c); return std::make_pair(true, t_c);
} }
auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)]; auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
return (e != 0) ? std::make_pair(true, e) return (e != 0) ? std::make_pair(true, e)
: std::make_pair( : std::make_pair(false, (e = static_cast<uint16_t>(
false, t_dictionary.size() + 255)));
(e = static_cast< }
typename std::remove_reference<decltype(e)>::type>(
t_dictionary.size()) + /**
255)); * Detailed description
*
* \param t_dict Dictionnaire
* \return Retourne une chaîne de caractères non signés
*/
ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
const uint16_t t_code, const uint16_t t_old) {
// le code existe dans le dictionnaire sil est < 256
if (t_code < 256) {
ustring e{static_cast<unsigned char>(t_code)};
// 256 car on n'a pas encore tenté d'insérer de nouveau caractère
if (t_old < 256) {
t_dict[static_cast<uint16_t>(t_dict.size() + 256)] =
static_cast<unsigned char>(t_old) + e;
} else {
t_dict[static_cast<uint16_t>(t_dict.size() + 256)] = t_dict[t_old] + e;
}
return e;
}
auto &e = t_dict[t_code];
auto str = (t_old < 256) ? ustring{static_cast<unsigned char>(t_old)}
: t_dict[t_old];
// le code existe dans le dictionnaire
if (!e.empty()) {
str += e[0];
const auto index = static_cast<uint16_t>(t_dict.size() + 256);
t_dict[index] = str;
return e;
}
// le code n'existe pas encore dans le dictionnaire
str += str[0];
e = str;
t_dict[t_code] = e;
return e;
} }

View File

@@ -8,10 +8,17 @@
#include <cstdint> #include <cstdint>
#include <map> #include <map>
#include <memory>
int ipow(int, int);
/// \brief Recherche ou ajout de chaine dans le dictionnaire /// \brief Recherche ou ajout de chaine dans le dictionnaire
const std::pair<bool, std::uint32_t> std::pair<bool, std::uint16_t>
dico(std::map<std::pair<std::uint32_t, std::uint8_t>, std::uint32_t> &, dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
uint32_t, uint8_t); std::uint16_t, std::uint8_t);
std::basic_string<unsigned char>
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
std::uint16_t, std::uint16_t);
#endif /* LZW_SRC_COMMON_H_ */ #endif /* LZW_SRC_COMMON_H_ */

View File

@@ -4,71 +4,60 @@
*/ */
#include "compress.hh" #include "compress.hh"
#include "utf8.hh" #include "io.hh"
#include "common.hh"
#include <cassert> #include <cassert>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <iostream> using std::string;
#include "io.hh" using std::uint16_t;
using std::uint8_t;
using std::vector;
using vuint16 = vector<uint16_t>;
using vvuint16 = vector<vuint16>;
using ustring = std::basic_string<unsigned char>;
using dict_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>; using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
using ustring = std::basic_string<uint8_t>; // chaîne non encodée
using uvec = std::vector<std::uint32_t>; // chaîne encodée
using std::printf; using std::printf;
constexpr size_t CHUNK_SIZE = 32768; const size_t DICT_MAX = static_cast<size_t>(ipow(2, 17) - 256); /* 16 bits */
/** /**
* * La chaîne de caractères \p t_text est lue caractère par caractère, et est
* Reçoit une liste de paires std::thread/vecteurs, le premier étant le
* processus dont sa sortie est stockée dans le second. La sortie, une liste
* de caractères uint32_t, est écrite dans le fichier de sortie \p out.
*
* \param[in] t_threads
* \param[out] t_out
*/
void join_and_write(
std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> &t_threads,
std::vector<std::vector<std::uint32_t>> &compressed_text) {
for (auto &elem : t_threads) {
(*elem.first).join();
}
for (auto &elem : t_threads) {
compressed_text.push_back(std::move(elem.second));
}
t_threads.clear();
}
/**
* La chaîne de caractère \p t_text est lue caractère par caractère, et est et
* selon la valeur de retour de la fonction \ref dico (permettant dans le même * selon la valeur de retour de la fonction \ref dico (permettant dans le même
* temps la création du dictionnaire), on rajoute un mot ou pas dans le vecteur * temps la création du dictionnaire), on rajoute ou non un nouveau caractère
* de caractères UTF-8 représentant des mots de chars compressés. La fonction * encodé sur 12bits dans le chunk courant. Dès que le dictionnaire est plein
* renvoie ledit vecteur de uint32_t via le paramètre \p t_res. * (2^12 caractères), le chunk est sauvegardé et vidé, et le dictionnaire est
* réinitialisé.
* *
* \param[in] t_text Chaîne de caractères uint8_t représentant le fichier d'entrée * \param t_text Chaîne de caractères uint8_t représentant le fichier d'entrée
* \param[out] t_res Chaîne de caractères de sortie * \return Vecteur de chunks (vecteurs de uint16_t)
*/ */
void lzw_compress(const std::vector<char> &t_text, uvec &t_res) { vvuint16 lzw_compress(ustring &&t_text) {
dict_t dictionary{};
std::puts("Compressing..."); std::puts("Compressing...");
uint32_t w = 0xFFFF; uint16_t w = 0xFFFF;
vuint16 chunk{};
constexpr size_t DICT_MAX = 7936; /* 12 bits */ vvuint16 res{};
dict_t dict{};
for (const auto &c : t_text) { for (const auto c : t_text) {
if (dictionary.size() >= DICT_MAX) { if (dict.size() >= DICT_MAX) {
t_res.push_back(static_cast<uint32_t>(w)); res.push_back(std::move(chunk));
w = static_cast<uint32_t>(c); chunk = vuint16{};
} else if (const auto &[exists, pos] = dict = dict_t{};
dico(dictionary, w, static_cast<std::uint8_t>(c)); w = 0xFFFF;
exists) { }
if (const auto &[yes, pos] = dico(dict, w, static_cast<uint8_t>(c)); yes) {
w = pos; w = pos;
} else { } else {
t_res.push_back(static_cast<uint32_t>(w)); chunk.push_back(w);
w = static_cast<std::uint8_t>(c); w = static_cast<uint16_t>(c);
} }
} }
if (w != 0xFFFF) {
chunk.push_back(w);
res.push_back(std::move(chunk));
}
return res;
} }
/** /**
@@ -81,69 +70,20 @@ void lzw_compress(const std::vector<char> &t_text, uvec &t_res) {
* \param[in] t_out_file Chemin vers le fichier de sortie * \param[in] t_out_file Chemin vers le fichier de sortie
*/ */
void compress(const std::string &t_in_file, const char *t_out_file) { void compress(const std::string &t_in_file, const char *t_out_file) {
// Fichier dentrée FILE *const input_file = fopen(t_in_file.c_str(), "rb");
std::ifstream input_file{t_in_file}; assert(input_file);
if (!input_file.is_open()) { FILE *const out = (t_out_file != nullptr) ? fopen(t_out_file, "wb")
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 2 : fopen("output.lzw", "wb");
<< ": could not open output file \"" << t_in_file assert(out);
<< "\". Aborting...\n";
exit(1);
}
// Fichier de sortie std::fseek(input_file, 0L, SEEK_END);
FILE *out = const auto file_size = static_cast<size_t>(ftell(input_file));
(t_out_file != nullptr) ? fopen(t_out_file, "wb") : fopen("output.lzw", "wb"); std::rewind(input_file);
if (out == nullptr) {
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
<< ": could not open output file. Aborting...\n";
input_file.close();
exit(1);
}
// collection of chunks
std::vector<std::vector<std::uint32_t>> compressed_text{};
// thread pool
std::vector<std::pair<std::unique_ptr<std::thread>, uvec>> threads{};
// chunk chars
std::vector<char> chunk(CHUNK_SIZE, 0);
while (input_file.read(chunk.data(),
static_cast<std::streamsize>(chunk.size()))) {
threads.emplace_back(nullptr, uvec{});
threads.back().second.reserve(CHUNK_SIZE);
threads.back().first = std::make_unique<std::thread>(
std::thread{lzw_compress, chunk, ref(threads.back().second)});
assert(threads.back().first);
if (threads.size() >= 8) {
join_and_write(threads, compressed_text);
}
}
if (!threads.empty()) {
join_and_write(threads, compressed_text);
}
if (input_file.tellg() != std::ios::end) {
std::puts("Leftovers, compressing...");
{
const auto prev_pos = input_file.tellg();
input_file.seekg(0, std::ios::end);
chunk.reserve(static_cast<size_t>(input_file.tellg() - prev_pos));
input_file.seekg(prev_pos, std::ios::beg);
std::istreambuf_iterator<char> itr(input_file);
for (std::streamoff i = 0; i < prev_pos; ++i, ++itr){
;
}
chunk.assign((itr), std::istreambuf_iterator<char>());
}
uvec ret{};
lzw_compress(chunk, ret);
compressed_text.push_back(std::move(ret));
}
auto raw_text = std::make_unique<unsigned char[]>(file_size);
std::fread(raw_text.get(), sizeof(unsigned char), file_size, input_file);
const auto compressed_text(lzw_compress(ustring{raw_text.get(), &raw_text[file_size]}));
write_file(out, compressed_text); write_file(out, compressed_text);
fclose(out); fclose(out);
input_file.close(); fclose(input_file);
} }

View File

@@ -7,17 +7,13 @@
#define LZW_SRC_COMPRESS_H_ #define LZW_SRC_COMPRESS_H_
#include "common.hh" #include "common.hh"
#include <vector>
#include <iostream> #include <iostream>
#include <thread> #include <thread>
#include <vector>
/// \brief Exécution des threads et écriture de leur résultat dans le fichier de sortie
void join_and_write(std::vector<std::pair<std::unique_ptr<std::thread>,
std::vector<std::uint32_t>>> &,
std::vector<std::vector<std::uint32_t>> &);
/// \brief Compression d'une chaine de caractères /// \brief Compression d'une chaine de caractères
void lzw_compress(const std::vector<char> &, std::vector<std::uint32_t> &); std::vector<std::vector<std::uint16_t>>
lzw_compress(std::basic_string<unsigned char> &&);
/// \brief Wrapper de \ref lzw_compress /// \brief Wrapper de \ref lzw_compress
void compress(const std::string &, const char *); void compress(const std::string &, const char *);

100
src/io.cc
View File

@@ -4,92 +4,62 @@
*/ */
#include "io.hh" #include "io.hh"
#include "bitpack.hh"
#include <array>
#include <algorithm>
#ifdef Debug #ifdef Debug
constexpr bool debug_mode = true; constexpr bool debug_mode = true;
#include <algorithm>
#else #else
constexpr bool debug_mode = false; constexpr bool debug_mode = false;
#endif #endif
using std::vector;
using std::uint16_t;
using vuint16 = vector<uint16_t>;
using vvuint16 = vector<vuint16>;
/** /**
* Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de * Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de
* sortie est composé des éléments suivants :\n * sortie est composé des éléments suivants :\n
* - Sur quatre octets sont écrit un `uint32_t` déterminant la taille d'un * - Sur deux octets sont écrit un `uint16_t` déterminant le nombre de chunk
* caractère\n
* - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de chunk
* composant le fichier\n * composant le fichier\n
* - Sont ensuite écrits les chunks sur des nombres doctets variable suivant * - Sont ensuite écrits les chunks sur un nombre variable doctets suivant la
* la taille dun caractère et le nombre de caractères\n * taille des chunks\n
* \n * \n
* Un chunk est composé de la manière qui suit :\n * Un chunk est composé de la manière qui suit :\n
* - Sur quatre octets sont écrit un `uint32_t` déterminant le nombre de * - Sur deux octets sont écrit un `uint32_t` déterminant le nombre doctets
* caractères composant le chunk\n * composant le chunk\n
* - Les caractères composant le chunk, accolés les uns au autres. Si le * - Sur le nombre doctets précisés par le header du chunk se trouvent les
* dernier caractère ne remplis pas le dernier octet du chunk, alors ces * données compressées par lalgorithme lzw puis via bit-packing.\n
* derniers bits seront initialisés à 0.\n
* La taille dun chunk est donc la taille dun caractère multiplié par le
* nombre de caractères du chunk, le tout divisé par 8. Si le résultat nest
* pas un entier, alors il est nivelé vers le haut pour avoir un nombre entier
* doctets (e.g. si le résultat est 103.4, alors 104 octets seront utilisés).
* *
* \param[out] t_out Fichier de sortie * \param[out] t_out Fichier de sortie
* \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out * \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out
*/ */
void write_file(FILE *t_out, std::vector<std::vector<std::uint32_t>> &t_text) { void write_file(FILE *const t_out, const vvuint16 &t_text) {
{ const auto size = static_cast<uint16_t>(t_text.size());
uint32_t char_size = 12;
if constexpr (debug_mode) {
std::printf("Char size: %u\n", char_size);
}
fwrite(&char_size, sizeof(uint32_t), 1, t_out);
auto size = static_cast<uint32_t>(t_text.size());
if constexpr (debug_mode) { if constexpr (debug_mode) {
std::printf("Number of chunks: %u\n", size); std::printf("Number of chunks: %u\n", size);
} }
fwrite(&size, sizeof(uint32_t), 1, t_out); fwrite(&size, sizeof(size), 1, t_out);
}
for (const auto &chunk : t_text) { for (const auto &chunk : t_text) {
// write size of chunk in uint32_t if constexpr (debug_mode)
{ std::printf("Chunk!\n");
auto size = static_cast<uint32_t>(chunk.size()); write_chunk(t_out, chunk);
if constexpr (debug_mode) {
std::printf("Size of chunk: %u\n", size);
}
fwrite(&size, sizeof(uint32_t), 1, t_out);
}
uint8_t remainder = 0x00;
for(size_t i = 0; i < chunk.size(); ++i) {
if(i % 2 == 0) {
// char = xxxx xxxx xxxx
// ^^^^^^^^^ ^^^^
// write keep in remainder as xxxx0000
auto temp = static_cast<unsigned char>(chunk[i] >> 4);
fwrite(&temp, sizeof(temp), 1, t_out);
if constexpr (debug_mode) {
std::printf("writing: %x\t\t", temp);
}
remainder = static_cast<uint8_t>(chunk[i] << 4);
} else {
// already have `remainder = yyyy0000`
// char = xxxx xxxx xxxx
// ^^^^ ^^^^^^^^^
// remainder = yyyyxxxx write after remainder
// remainder = 00000000
remainder &= static_cast<unsigned char>(chunk[i]) >> 8 & 0xF0;
fwrite(&remainder, sizeof(remainder), 1, t_out);
if constexpr (debug_mode) {
std::printf("writing remainder: %x\t\t", remainder);
}
auto temp = static_cast<unsigned char>(chunk[i]);
fwrite(&temp, sizeof(temp), 1, t_out);
if constexpr (debug_mode) {
std::printf("writing: %x\n", temp);
}
remainder = 0x00;
}
}
if(remainder != 0) {
fwrite(&remainder, sizeof(remainder), 1, t_out);
} }
} }
/**
* Écrit dans le fichier \p t_out le chunk unique \p t_chunk. Se référer à la
* documentation de \ref write_file pour plus de détails.
*
* \param t_out Output file
* \param t_chunk Chunk to be written to \p t_out
*/
void write_chunk(FILE *const t_out, const vuint16 &t_chunk) {
const auto output = pack(t_chunk);
const auto chunk_size = static_cast<uint32_t>(output.size());
fwrite(&chunk_size, sizeof(chunk_size), 1, t_out);
fwrite(output.data(), sizeof(output[0]), output.size(), t_out);
} }

View File

@@ -1,13 +1,14 @@
/** /**
* \file io.h * \file io.hh
* \brief Header for file reading and writing * \brief Header for file reading and writing
*/ */
#ifndef LZW_SRC_IO_H_ #ifndef LZW_SRC_IO_H_
#define LZW_SRC_IO_H_ #define LZW_SRC_IO_H_
#include <cstdio>
#include <cstdint> #include <cstdint>
#include <cstdio>
#include <iostream>
#include <vector> #include <vector>
/* /*
@@ -18,14 +19,17 @@
* *
* Un chunk se compose ainsi : * Un chunk se compose ainsi :
* nb_char_chunk : nombre de caractères du chunk (2B) * nb_char_chunk : nombre de caractères du chunk (2B)
* text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) / 8)) * text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) /
* 8))
* *
* Si le dernier caractère ne termine pas le dernier octet du chunk, les * Si le dernier caractère ne termine pas le dernier octet du chunk, les
* derniers bits sont mit à zéro * derniers bits sont mit à zéro
*/ */
/// \brief Écrit dans le fichier le texte compressé /// \brief Écrit dans le fichier le texte compressé
void write_file(FILE *, std::vector<std::vector<std::uint32_t>> &); void write_file(FILE *, const std::vector<std::vector<std::uint16_t>> &);
/// \brief Écrit un chunk dans le fichier de sortie
void write_chunk(FILE *, const std::vector<std::uint16_t> &);
#endif /* LZW_SRC_IO_H_ */ #endif /* LZW_SRC_IO_H_ */

View File

@@ -6,17 +6,16 @@
* *
*/ */
#ifdef Debug #include <getopt.h>
constexpr bool debug_mode = true; #include <cassert>
#else #include <tuple>
constexpr bool debug_mode = false;
#endif
#include "compress.hh" #include "compress.hh"
#include "getopt.h" #include "uncompress.hh"
using std::printf; using std::printf;
using std::puts; using std::puts;
using std::string;
using std::tuple;
// custom types /////////////////////////////////////////////////////////////// // custom types ///////////////////////////////////////////////////////////////
@@ -47,22 +46,14 @@ void help() {
puts("\t-o\tpath to the output file (if the file already exists, it will"); puts("\t-o\tpath to the output file (if the file already exists, it will");
puts("\t\tbe overwritten). Default: input path + \".lzw\""); puts("\t\tbe overwritten). Default: input path + \".lzw\"");
puts("\t-c\tcompress the input file"); puts("\t-c\tcompress the input file");
puts("\t-d\tdecompresses the input file to the output file. If no output"); puts("\t-u\tuncompresses the input file to the output file. If no output");
puts("\t\tpath has not been entered and if the input file ends with "); puts("\t\tpath has not been entered and if the input file ends with ");
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the "); puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
puts("\t\textension \".uncompresed\" will be added"); puts("\t\textension \".uncompresed\" will be added");
} }
int main(int argc, char *argv[]) { std::tuple<string, string, bool> process_args(int t_argc, char *t_argv[]) {
if constexpr (debug_mode) { auto ret = std::make_tuple(string{}, string{}, false);
for (int i = 0; i < argc; ++i)
printf("argv[%d] = %s\n", i, argv[i]);
}
std::string input_path{};
std::string output_path{};
bool compressing = true;
while (true) { while (true) {
int option_index = 0; int option_index = 0;
static struct option long_options[] = { static struct option long_options[] = {
@@ -72,97 +63,46 @@ int main(int argc, char *argv[]) {
{"compress", no_argument, nullptr, 'c'}, {"compress", no_argument, nullptr, 'c'},
{"uncompress", no_argument, nullptr, 'u'}, {"uncompress", no_argument, nullptr, 'u'},
{nullptr, 0, nullptr, 0}}; {nullptr, 0, nullptr, 0}};
int c = getopt_long(argc, argv, "hi:o:cu", long_options, &option_index); int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
if (c == -1) if (c == -1) break;
break;
switch (c) { switch (c) {
case 0: { case 0:
if constexpr (debug_mode) {
printf("\noption %s", long_options[option_index].name);
if (optarg) {
printf(" with arg %s\n", optarg);
}
}
break; break;
} case 'h':
case 'h': {
if constexpr (debug_mode) {
printf("From main - option --help passed\n");
}
help(); help();
return 0; exit(0);
} case 'i':
case 'i': { std::get<0>(ret) = optarg;
if constexpr (debug_mode) {
printf("From main - option --input with value '%s'\n", optarg);
}
input_path = optarg;
break; break;
} case 'o':
case 'o': { std::get<1>(ret) = optarg;
if constexpr (debug_mode) {
printf("From main - option --output with value '%s'\n", optarg);
}
output_path = optarg;
break; break;
} case 'c':
case 'c': { std::get<2>(ret) = true;
if constexpr (debug_mode) {
printf("From main - option --compress\n");
}
compressing = true;
break; break;
} case 'u':
case 'u': { std::get<2>(ret) = false;
if constexpr (debug_mode) {
printf("From main - option --uncompress\n");
}
compressing = false;
break; break;
}
case '?': case '?':
default: { [[fallthrough]];
default:
puts("Error: unknown parameter."); puts("Error: unknown parameter.");
if constexpr (debug_mode) {
printf("From main - option -?\n");
}
help(); help();
return 1; exit(1);
} }
} }
return ret;
} }
if (input_path.empty()) { /* TODO: compression multiple : nombre de compressions puis fichier compressé */
puts("Error: no input file specified"); int main(int argc, char *argv[]) {
return 2; const auto [input_path, output_path, compressing] = process_args(argc, argv);
} assert(!input_path.empty());
if (compressing) { if (compressing) {
/* compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
TODO:
- compresser le fichier dentrée morceaux par morceaux, 16Ko à la fois
- écrire la taille du segment compressé, puis le segment compressé
- multithreading
- compression multiple : nombre de compressions puis fichier compressé
- bit-packing, limiter la taille du dictionnaire pour un certain nombre de
bits.
*/
if constexpr (debug_mode) {
puts("Beginning compression");
}
if (output_path.empty()) {
compress(input_path, nullptr);
} else { } else {
compress(input_path, output_path.c_str()); uncompress(input_path,
(output_path.empty()) ? nullptr : output_path.c_str());
} }
// compress(input_path, output_path.c_str());
} else {
puts("Not yet implemented :(");
/*
Inversion des types du dictionnaire pour retrouver les chaînes plus
aisément
*/
}
return 0; return 0;
} }

64
src/uncompress.cc Normal file
View File

@@ -0,0 +1,64 @@
#include "uncompress.hh"
#include "bitpack.hh"
#include "common.hh"
#include <cassert>
#include <cstdio>
#include <cstdlib>
using std::fclose;
using std::fopen;
using std::fseek;
using std::string;
using std::uint16_t;
using std::vector;
using ustring = std::basic_string<unsigned char>;
using vuint16 = vector<uint16_t>;
ustring lzw_uncompress(vuint16 &&t_compressed) {
ustring ret{};
uint16_t old = 0;
std::map<uint16_t, ustring> dict{};
uint16_t v = t_compressed[0];
ret.append({static_cast<unsigned char>(v)});
old = v;
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
v = *it;
const auto uncompressed{dico_uncompress(dict, v, old)};
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
old = v;
}
return ret;
}
void uncompress(const string &t_input_name, const char *t_output_name) {
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
assert(input);
FILE *const output =
(t_output_name != nullptr)
? std::fopen(t_output_name, "wb")
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
assert(output);
uint16_t nb_chunks = 0;
std::fseek(input, 0, SEEK_SET);
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
for (uint16_t i = 0; i < nb_chunks; ++i) {
uint32_t size_chunk = 0;
fread(&size_chunk, sizeof(size_chunk), 1, input);
auto *chunk = static_cast<unsigned char *>(
std::malloc(sizeof(unsigned char) * size_chunk));
fread(chunk, sizeof(unsigned char), size_chunk, input);
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
// sometimes will add null char
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
uncompressed_chunk.size(), output);
}
std::fclose(output);
std::fclose(input);
}

12
src/uncompress.hh Normal file
View File

@@ -0,0 +1,12 @@
#ifndef LZW_SRC_UNCOMPRESS_H_
#define LZW_SRC_UNCOMPRESS_H_
#include <memory>
#include <string>
#include <vector>
std::basic_string<unsigned char> lzw_uncompress(std::vector<std::uint16_t> &&);
void uncompress(const std::string &, const char *);
#endif /* LZW_SRC_UNCOMPRESS_H_ */

View File

@@ -1,52 +0,0 @@
/**
* \file utf8.cc
* \brief Implementation for UTF-8 related functions
*/
#include "utf8.hh"
#include <string>
using FILE = std::FILE;
using uint8_t = std::uint8_t;
using uint32_t = std::uint32_t;
using ustring = std::basic_string<uint8_t>; // chaine non encodée
/**
* Les caractères \c passés en argument sont écrit dans le fichier de sortie au
* format UTF-8
*
* \param[in] out Fichier de sortie
* \param[in] c Caractères à écrire dans \p out
*/
void write_utf8(FILE* t_out, uint32_t t_c) {
if(t_c < 128) {
fwrite(&t_c, sizeof(unsigned char), 1, t_out);
return;
}
size_t loops = 0;
unsigned char header = 0;
if (t_c < 2048) {
loops = 1;
header = 0xC0;
} else if (t_c < 65536) {
loops = 2;
header = 0xE0;
} else if (t_c < 2097152) {
loops = 3;
header = 0xF0;
} else if (t_c < 67108864) {
loops = 4;
header = 0xF8;
} else {
loops = 5;
header = 0xFC;
}
ustring str(loops + 1, 0);
for (size_t i = 0; i <= loops; ++i) {
str[i] = static_cast<unsigned char>(
((t_c & ((i == loops) ? 0x3F : 0xFF)) >> ((loops - i) * 6)) +
((i == 0) ? header : 0x80));
}
fwrite(str.data(), sizeof(unsigned char), str.size(), t_out);
}

View File

@@ -1,26 +0,0 @@
/**
* \file utf8.hh
* \brief Header for UTF-8 related functions
*/
#ifndef LZW_SRC_UTF8_H_
#define LZW_SRC_UTF8_H_
#include <cstdio>
#include <cstdint>
/*
Lencodage des caractères se fait en UTF-8
char < 128 => "0xxxxxxx" 7bits
char < 2,048 => "110xxxxx 10xxxxxx" 11bits
char < 65,536 => "1110xxxx 10xxxxxx 10xxxxxx" 16bits
char < 2,097,152 => "11110xxx 10xxxxxx 10xxxxxx 10xxxxxx" 21bits
char < 67,108,864 => "111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 26bits
char < 2,147,483,648 => "1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx" 31bits
*/
/// \brief Écrit les caractères au format UTF-8
void write_utf8(std::FILE* t_out, std::uint32_t t_c);
#endif /* LZW_SRC_UTF8_H_ */