Compare commits
1 Commits
master
...
new-bitpac
Author | SHA1 | Date | |
---|---|---|---|
|
7775fec68e |
@ -5,4 +5,5 @@ jobs:
|
|||||||
- image: purplekarrot/gcc-8
|
- image: purplekarrot/gcc-8
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- run: apt-get update && apt-get install -y doxygen doxygen-docs doxygen-latex doxygen-gui graphviz
|
||||||
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
|
- run: cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
|
||||||
|
12
.gitignore
vendored
12
.gitignore
vendored
@ -1,13 +1,15 @@
|
|||||||
*~
|
*~
|
||||||
|
|
||||||
gmon\.out
|
gmon\.out
|
||||||
|
/cmake-build-debug/Makefile
|
||||||
|
|
||||||
|
cmake-build-debug/
|
||||||
|
|
||||||
|
\.idea/
|
||||||
*.lzw
|
*.lzw
|
||||||
|
|
||||||
\.scannerwork/
|
docs/Doxyfile
|
||||||
|
|
||||||
bw-output/
|
docs/html/
|
||||||
|
|
||||||
bin/
|
docs/latex/
|
||||||
build/
|
|
||||||
debug/
|
|
||||||
|
@ -1,7 +0,0 @@
|
|||||||
image: rikorose/gcc-cmake:latest
|
|
||||||
stages:
|
|
||||||
- build
|
|
||||||
build:
|
|
||||||
stage: build
|
|
||||||
script:
|
|
||||||
- mkdir -p build bin && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j
|
|
48
.travis.yml
48
.travis.yml
@ -4,48 +4,66 @@ compiler: clang
|
|||||||
os: linux
|
os: linux
|
||||||
dist: trusty
|
dist: trusty
|
||||||
addons:
|
addons:
|
||||||
sonarcloud:
|
|
||||||
organization: "phundrak-github"
|
|
||||||
token:
|
|
||||||
secure: ${SONAR_TOKEN}
|
|
||||||
apt:
|
apt:
|
||||||
config:
|
|
||||||
retries: true
|
|
||||||
sources:
|
sources:
|
||||||
- ubuntu-toolchain-r-test
|
- ubuntu-toolchain-r-test
|
||||||
- llvm-toolchain-trusty-5.0
|
- llvm-toolchain-trusty-5.0
|
||||||
packages:
|
packages:
|
||||||
- g++-7
|
- g++-7
|
||||||
- clang-5.0
|
- clang-5.0
|
||||||
cache:
|
- doxygen
|
||||||
apt: true
|
- doxygen-doc
|
||||||
|
- doxygen-latex
|
||||||
|
- doxygen-gui
|
||||||
|
- graphviz
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Linux Clang C++17
|
# Linux Clang C++17
|
||||||
- env:
|
- env:
|
||||||
BUILD_TYPE=debug
|
BUILD_TYPE=Debug
|
||||||
BIN_DIR=debug
|
BIN_DIR=debug
|
||||||
CC=clang-5.0
|
CC=clang-5.0
|
||||||
CXX=clang++-5.0
|
CXX=clang++-5.0
|
||||||
- env:
|
- env:
|
||||||
BUILD_TYPE=release
|
BUILD_TYPE=Release
|
||||||
BIN_DIR=bin
|
BIN_DIR=bin
|
||||||
CC=clang-5.0
|
CC=clang-5.0
|
||||||
CXX=clang++-5.0
|
CXX=clang++-5.0
|
||||||
|
|
||||||
# Linux GCC C++17
|
# Linux GCC C++17
|
||||||
- env:
|
- env:
|
||||||
- MATRIX_EVAL="BUILD_TYPE=debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
|
- MATRIX_EVAL="BUILD_TYPE=Debug && BIN_DIR=debug && CC=gcc-7 && CXX=g++-7"
|
||||||
compiler: gcc
|
compiler: gcc
|
||||||
before_install:
|
before_install:
|
||||||
- eval "${MATRIX_EVAL}"
|
- eval "${MATRIX_EVAL}"
|
||||||
- env:
|
- env:
|
||||||
- MATRIX_EVAL="BUILD_TYPE=release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
|
- MATRIX_EVAL="BUILD_TYPE=Release && BIN_DIR=bin && CC=gcc-7 && CXX=g++-7"
|
||||||
compiler: gcc
|
compiler: gcc
|
||||||
before_install:
|
before_install:
|
||||||
- eval "${MATRIX_EVAL}"
|
- eval "${MATRIX_EVAL}"
|
||||||
|
|
||||||
|
# OSX Clang
|
||||||
|
# - os: osx
|
||||||
|
# osx_image: xcode9.3
|
||||||
|
# env:
|
||||||
|
# BUILD_TYPE=Release
|
||||||
|
# BIN_DIR=bin
|
||||||
|
# before_install:
|
||||||
|
# - brew update
|
||||||
|
# - brew install doxygen graphviz
|
||||||
|
# install:
|
||||||
|
# brew upgrade cmake
|
||||||
|
# - os: osx
|
||||||
|
# osx_image: xcode9.3
|
||||||
|
# env:
|
||||||
|
# BUILD_TYPE=Debug
|
||||||
|
# BIN_DIR=debug
|
||||||
|
# before_install:
|
||||||
|
# brew update
|
||||||
|
# install:
|
||||||
|
# brew upgrade cmake
|
||||||
script:
|
script:
|
||||||
- make $BUILD_TYPE
|
- cd build
|
||||||
- build-wrapper-linux-x86-64 --out-dir bw-output make clean all
|
- cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
|
||||||
- sonar-scanner -Dsonar.projectKey=Phundrak_lzw-assignment -Dsonar.sources=./src/ -Dsonar.cfamily.build-wrapper-output=bw-output -Dsonar.host.url=https://sonarcloud.io
|
- make -j
|
||||||
|
@ -7,7 +7,7 @@ set(TGT "projet_lzw")
|
|||||||
set(${TGT}_VERSION_MAJOR 0)
|
set(${TGT}_VERSION_MAJOR 0)
|
||||||
set(${TGT}_VERSION_MINOR 1)
|
set(${TGT}_VERSION_MINOR 1)
|
||||||
|
|
||||||
set(CXX_COVERAGE_COMPILE_FLAGS "-pedantic -Wall -Wextra -Wold-style-cast -Woverloaded-virtual -Wfloat-equal -Wwrite-strings -Wpointer-arith -Wcast-qual -Wcast-align -Wconversion -Wsign-conversion -Wshadow -Weffc++ -Wredundant-decls -Wdouble-promotion -Winit-self -Wswitch-default -Wswitch-enum -Wundef -Winline -Wunused -Wnon-virtual-dtor -pthread")
|
set(CXX_COVERAGE_COMPILE_FLAGS "-pedantic -Wall -Wextra -Wold-style-cast -Woverloaded-virtual -Wfloat-equal -Wwrite-strings -Wpointer-arith -Wcast-qual -Wcast-align -Wshadow -Weffc++ -Wredundant-decls -Wdouble-promotion -Winit-self -Wswitch-default -Wswitch-enum -Wundef -Winline -Wunused -Wnon-virtual-dtor -Wno-conversion -pthread")
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "${CXX_COVERAGE_COMPILE_FLAGS} -DDebug -g -pg")
|
set(CMAKE_CXX_FLAGS_DEBUG "${CXX_COVERAGE_COMPILE_FLAGS} -DDebug -g -pg")
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CXX_COVERAGE_COMPILE_FLAGS} -O3")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CXX_COVERAGE_COMPILE_FLAGS} -O3")
|
||||||
|
|
||||||
@ -34,6 +34,26 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "../debug/")
|
|||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CXX_COVERAGE_COMPILE_FLAGS}")
|
||||||
|
|
||||||
|
# indicate the doc build as an option, ON by default
|
||||||
|
if(CMAKE_BUILD_TYPE MATCHES "^[Rr]elease")
|
||||||
|
option(BUILD_DOC "Build documentation" ON)
|
||||||
|
find_package(Doxygen
|
||||||
|
REQUIRED dot)
|
||||||
|
if(DOXYGEN_FOUND)
|
||||||
|
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile.in)
|
||||||
|
set(DOXYGEN_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile)
|
||||||
|
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
|
||||||
|
message("Doxygen build started")
|
||||||
|
add_custom_target(docs ALL
|
||||||
|
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
COMMENT "Generating API documentation with Doxygen"
|
||||||
|
VERBATIM )
|
||||||
|
else()
|
||||||
|
message("Doxygen needs to be installed to generate the doxygen documentation.")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
include_directories(includes)
|
include_directories(includes)
|
||||||
file(GLOB SOURCES "src/*.cc")
|
file(GLOB SOURCES "src/*.cc")
|
||||||
add_executable(${TGT} ${SOURCES})
|
add_executable(${TGT} ${SOURCES})
|
||||||
|
16
Makefile
16
Makefile
@ -1,16 +0,0 @@
|
|||||||
all:
|
|
||||||
@mkdir -p build
|
|
||||||
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
|
|
||||||
@strip bin/project_lzw
|
|
||||||
|
|
||||||
release:
|
|
||||||
@mkdir -p build
|
|
||||||
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Release .. && make
|
|
||||||
@strip bin/projet_lzw
|
|
||||||
|
|
||||||
debug:
|
|
||||||
@mkdir -p build
|
|
||||||
@cd build && pwd && cmake -DCMAKE_BUILD_TYPE=Debug .. && make
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf bin/* build/* debug/*
|
|
14
README.md
14
README.md
@ -2,7 +2,6 @@
|
|||||||
[![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment)
|
[![Travis Badge](https://travis-ci.org/Phundrak/lzw-assignment.svg?branch=master)](https://travis-ci.org/Phundrak/lzw-assignment)
|
||||||
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade)
|
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/80cf9a0514554f368effaf78d8e4ae15)](https://www.codacy.com/app/Phundrak/lzw-assignment?utm_source=github.com&utm_medium=referral&utm_content=Phundrak/lzw-assignment&utm_campaign=Badge_Grade)
|
||||||
[![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
|
[![CodeFactor](https://www.codefactor.io/repository/github/phundrak/lzw-assignment/badge)](https://www.codefactor.io/repository/github/phundrak/lzw-assignment)
|
||||||
[![SonarCloud Badge](https://sonarcloud.io/api/project_badges/measure?project=Phundrak_lzw-assignment&metric=alert_status)](https://sonarcloud.io/dashboard?id=Phundrak_lzw-assignment)
|
|
||||||
[![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org)
|
[![Spacemacs Badge](https://cdn.rawgit.com/syl20bnr/spacemacs/442d025779da2f62fc86c2082703697714db6514/assets/spacemacs-badge.svg)](http://spacemacs.org)
|
||||||
|
|
||||||
# LZW Compressing tool
|
# LZW Compressing tool
|
||||||
@ -10,16 +9,3 @@
|
|||||||
This is a university assignment for which I aim to create an LZW algorithm implementation to create a small tool similar to `gzip` and `gunzip` that can compress and uncompress files in a lossless fashion.
|
This is a university assignment for which I aim to create an LZW algorithm implementation to create a small tool similar to `gzip` and `gunzip` that can compress and uncompress files in a lossless fashion.
|
||||||
|
|
||||||
This project is written is C++17, compiled with clang under a UNIX environment. Other compilers and environments will not be tested.
|
This project is written is C++17, compiled with clang under a UNIX environment. Other compilers and environments will not be tested.
|
||||||
|
|
||||||
## How to use it
|
|
||||||
|
|
||||||
Currently, five different options are available to the user:
|
|
||||||
- `-h` or `--help` will show how to use `projet_lzw`
|
|
||||||
- `-c` or `--compress` tells `projet_lzw` to compress the input file
|
|
||||||
- `-u` or `--uncompress` tells `projet_lzw` to uncompress the input file
|
|
||||||
- `-i <file>` or `--input <file path>` specifies the input file to be compressed or uncompressed (**MANDATORY**)
|
|
||||||
- `-o <file>` or `--output <file path>` specifies the name of the output file. If not used, the default output name for compression is `output.lzw`, and the default name for uncompressing is `<filename>_uncompressed`.
|
|
||||||
|
|
||||||
By default, `projet_lzw` will uncompress the (mandatory) input file.
|
|
||||||
|
|
||||||
It is planned to add in the future a sixth option, `-p` or `--passes` that will allow to compress multiple times the input file.
|
|
||||||
|
2
bin/.gitignore
vendored
Normal file
2
bin/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
2
build/.gitignore
vendored
Normal file
2
build/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
2
debug/.gitignore
vendored
Normal file
2
debug/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
33
docs/Doxyfile.in
Normal file
33
docs/Doxyfile.in
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
OUTPUT_DIRECTORY = @CMAKE_CURRENT_SOURCE_DIR@/docs/
|
||||||
|
INPUT = @CMAKE_CURRENT_SOURCE_DIR@/src/ @CMAKE_CURRENT_SOURCE_DIR@/docs
|
||||||
|
DOXYFILE ENCODING = UTF-8
|
||||||
|
PROJECT_NAME = "Compression LZW"
|
||||||
|
PROJECT_NUMBER = 0.3
|
||||||
|
PROJECT_BRIEF = "Utilitaire de compression/décompression de fichiers via l’algorithme LZW"
|
||||||
|
ALLOW_UNICODE_NAMES = YES
|
||||||
|
OUTPUT_LANGUAGE = French
|
||||||
|
FULL_PATH_NAMES = NO
|
||||||
|
TAB_SIZE = 2
|
||||||
|
EXTRACT_ALL = YES
|
||||||
|
CASE_SENSE_NAMES = YES
|
||||||
|
INPUT_ENCODING = UTF-8
|
||||||
|
FILE_PATTERNS = *.cc \
|
||||||
|
*.hh
|
||||||
|
RECURSIVE = YES
|
||||||
|
EXAMPLE_PATTERNS = *
|
||||||
|
SOURCE_BROWSER = YES
|
||||||
|
INLINE_SOURCES = YES
|
||||||
|
REFERENCED_BY_RELATION = YES
|
||||||
|
REFERENCES_RELATION = YES
|
||||||
|
USE_HTAGS = NO
|
||||||
|
HTML_TIMESTAMP = YES
|
||||||
|
GENERATE_DOCSET = NO
|
||||||
|
GENERATE_HTMLHELP = NO
|
||||||
|
LATEX_SOURCE_CODE = YES
|
||||||
|
LATEX_TIMESTAMP = YES
|
||||||
|
ENABLE_PREPROCESSING = NO
|
||||||
|
HAVE_DOT = YES
|
||||||
|
UML_LOOK = YES
|
||||||
|
CALL_GRAPH = YES
|
||||||
|
CALLER_GRAPH = YES
|
||||||
|
INTERACTIVE_SVG = YES
|
1049
src/bitpack.cc
1049
src/bitpack.cc
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,8 @@
|
|||||||
|
/**
|
||||||
|
* \file bitpack.hh
|
||||||
|
* \brief Header for bit-packing functions
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef LZW_SRC_BITPACK_H_
|
#ifndef LZW_SRC_BITPACK_H_
|
||||||
#define LZW_SRC_BITPACK_H_
|
#define LZW_SRC_BITPACK_H_
|
||||||
|
|
||||||
@ -5,28 +10,84 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
[[nodiscard]] std::vector<unsigned char>
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
pack(const std::vector<std::uint16_t> &);
|
// packing //
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
[[nodiscard]] std::vector<unsigned char>
|
std::vector<unsigned char> pack(const std::vector<std::uint16_t> &);
|
||||||
pack_n(std::vector<std::uint16_t>::const_iterator,
|
|
||||||
std::vector<std::uint16_t>::const_iterator, std::vector<unsigned char> &,
|
|
||||||
int);
|
|
||||||
|
|
||||||
[[nodiscard]] std::vector<unsigned char>
|
std::vector<unsigned char> pack_9(std::vector<std::uint16_t>::const_iterator,
|
||||||
pack_16(std::vector<std::uint16_t>::const_iterator,
|
std::vector<std::uint16_t>::const_iterator);
|
||||||
|
|
||||||
|
std::vector<unsigned char> pack_10(std::vector<std::uint16_t>::const_iterator,
|
||||||
std::vector<std::uint16_t>::const_iterator,
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
std::vector<unsigned char> &);
|
std::vector<unsigned char> &);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::uint16_t>
|
std::vector<unsigned char> pack_11(std::vector<std::uint16_t>::const_iterator,
|
||||||
unpack(std::basic_string<unsigned char> &&);
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<unsigned char> &);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::uint16_t>
|
std::vector<unsigned char> pack_12(std::vector<std::uint16_t>::const_iterator,
|
||||||
unpack_n(std::basic_string<unsigned char>::const_iterator t_begin,
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
std::basic_string<unsigned char>::const_iterator t_end,
|
std::vector<unsigned char> &);
|
||||||
std::vector<std::uint16_t> &, int t_n);
|
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::uint16_t>
|
std::vector<unsigned char> pack_13(std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<unsigned char> &);
|
||||||
|
|
||||||
|
std::vector<unsigned char> pack_14(std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<unsigned char> &);
|
||||||
|
|
||||||
|
std::vector<unsigned char> pack_15(std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<unsigned char> &);
|
||||||
|
|
||||||
|
std::vector<unsigned char> pack_16(std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<std::uint16_t>::const_iterator,
|
||||||
|
std::vector<unsigned char> &);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// unpack //
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
std::vector<std::uint16_t> unpack(std::basic_string<unsigned char> &&);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_9(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_10(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_11(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_12(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_13(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_14(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
|
unpack_15(std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
|
std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
|
std::vector<std::uint16_t>
|
||||||
unpack_16(std::basic_string<unsigned char>::const_iterator,
|
unpack_16(std::basic_string<unsigned char>::const_iterator,
|
||||||
std::basic_string<unsigned char>::const_iterator,
|
std::basic_string<unsigned char>::const_iterator,
|
||||||
std::vector<std::uint16_t> &);
|
std::vector<std::uint16_t> &);
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
/**
|
||||||
|
* \file common.cc
|
||||||
|
* \brief Implementation for functions in common
|
||||||
|
*/
|
||||||
|
|
||||||
#include "common.hh"
|
#include "common.hh"
|
||||||
|
|
||||||
using std::uint16_t;
|
using std::uint16_t;
|
||||||
@ -6,7 +11,7 @@ using dic_comp_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
|
|||||||
using ustring = std::basic_string<unsigned char>;
|
using ustring = std::basic_string<unsigned char>;
|
||||||
using p_ustring = std::shared_ptr<ustring>;
|
using p_ustring = std::shared_ptr<ustring>;
|
||||||
|
|
||||||
[[nodiscard]] int ipow(int base, int exp) {
|
int ipow(int base, int exp) {
|
||||||
int result = 1;
|
int result = 1;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (exp & 1) {
|
if (exp & 1) {
|
||||||
@ -21,21 +26,43 @@ using p_ustring = std::shared_ptr<ustring>;
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::pair<bool, uint16_t>
|
/**
|
||||||
dico(dic_comp_t &t_dictionary, const uint16_t t_nr_chaine, const uint8_t t_c) {
|
* Cette fonction a pour double usage la recherche d’une chaine de caractères
|
||||||
|
* dans le dictionnaire, ou bien l’ajout d’une nouvelle chaîne si celle-ci
|
||||||
|
* n’est pas déjà présente. Une chaine de caractères est représentée par un
|
||||||
|
* couple numéro de chaine / caractère, le numéro de chaine renvoyant au
|
||||||
|
* caractère précédent (soit son code ASCII, soit son indice dans le
|
||||||
|
* dictionnaire) et le caractère se référant au dernier caractère de la chaine
|
||||||
|
* courante. Si le numéro de chaine est -1, alors il s’agit du premier caractère
|
||||||
|
* de la chaine, et la valeur renvoyée sera la valeur ASCII du caractère. La
|
||||||
|
* fonction renvoie une paire bool/uint16_t, la valeur booléene indiquant si une
|
||||||
|
* nouvelle fut ajoutée dans le dictionnaire ou non, et le uint16_t indiquant la
|
||||||
|
* valeur numérique de la chaîne dans le dictionnaire.
|
||||||
|
*
|
||||||
|
* \param t_dictionary Dictionnaire
|
||||||
|
* \param t_nr_chaine Numéro de la chaine précédant le caractères \p t_c dans \p t_dictionary
|
||||||
|
* \param t_c Caractère suivant la chaine de caractères \p t_nr_chaine
|
||||||
|
* \return const std::pair<bool, uint16_t>
|
||||||
|
*/
|
||||||
|
std::pair<bool, uint16_t> dico(dic_comp_t &t_dictionary,
|
||||||
|
const uint16_t t_nr_chaine, const uint8_t t_c) {
|
||||||
if (t_nr_chaine == 0xFFFF) {
|
if (t_nr_chaine == 0xFFFF) {
|
||||||
return std::make_pair(true, t_c);
|
return std::make_pair(true, t_c);
|
||||||
}
|
}
|
||||||
auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
|
auto &e = t_dictionary[std::make_pair(t_nr_chaine, t_c)];
|
||||||
if (e != 0)
|
return (e != 0) ? std::make_pair(true, e)
|
||||||
return std::make_pair(true, e);
|
: std::make_pair(false, (e = static_cast<uint16_t>(
|
||||||
e = static_cast<uint16_t>(t_dictionary.size() + 255);
|
t_dictionary.size() + 255)));
|
||||||
return std::make_pair(false, e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
|
/**
|
||||||
const uint16_t t_code,
|
* Detailed description
|
||||||
const uint16_t t_old) {
|
*
|
||||||
|
* \param t_dict Dictionnaire
|
||||||
|
* \return Retourne une chaîne de caractères non signés
|
||||||
|
*/
|
||||||
|
ustring dico_uncompress(std::map<uint16_t, ustring> &t_dict,
|
||||||
|
const uint16_t t_code, const uint16_t t_old) {
|
||||||
// le code existe dans le dictionnaire s’il est < 256
|
// le code existe dans le dictionnaire s’il est < 256
|
||||||
if (t_code < 256) {
|
if (t_code < 256) {
|
||||||
ustring e{static_cast<unsigned char>(t_code)};
|
ustring e{static_cast<unsigned char>(t_code)};
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
/**
|
||||||
|
* \file common.hh
|
||||||
|
* \brief Header for functions in common
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef LZW_SRC_COMMON_H_
|
#ifndef LZW_SRC_COMMON_H_
|
||||||
#define LZW_SRC_COMMON_H_
|
#define LZW_SRC_COMMON_H_
|
||||||
|
|
||||||
@ -5,14 +10,15 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
[[nodiscard]] int ipow(int, int);
|
int ipow(int, int);
|
||||||
|
|
||||||
[[nodiscard]] std::pair<bool, std::uint16_t>
|
/// \brief Recherche ou ajout de chaine dans le dictionnaire
|
||||||
|
std::pair<bool, std::uint16_t>
|
||||||
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
|
dico(std::map<std::pair<std::uint16_t, std::uint8_t>, std::uint16_t> &,
|
||||||
const std::uint16_t, const std::uint8_t);
|
std::uint16_t, std::uint8_t);
|
||||||
|
|
||||||
[[nodiscard]] std::basic_string<unsigned char>
|
std::basic_string<unsigned char>
|
||||||
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
|
dico_uncompress(std::map<std::uint16_t, std::basic_string<unsigned char>> &,
|
||||||
const std::uint16_t, const std::uint16_t);
|
std::uint16_t, std::uint16_t);
|
||||||
|
|
||||||
#endif /* LZW_SRC_COMMON_H_ */
|
#endif /* LZW_SRC_COMMON_H_ */
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
|
/**
|
||||||
|
* \file compress.cc
|
||||||
|
* \brief Implementation of compression
|
||||||
|
*/
|
||||||
|
|
||||||
#include "compress.hh"
|
#include "compress.hh"
|
||||||
#include "common.hh"
|
|
||||||
#include "io.hh"
|
#include "io.hh"
|
||||||
|
#include "common.hh"
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iterator>
|
|
||||||
using std::ios;
|
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::uint16_t;
|
using std::uint16_t;
|
||||||
using std::uint8_t;
|
using std::uint8_t;
|
||||||
@ -17,39 +20,33 @@ using ustring = std::basic_string<unsigned char>;
|
|||||||
using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
|
using dict_t = std::map<std::pair<uint16_t, uint8_t>, uint16_t>;
|
||||||
using std::printf;
|
using std::printf;
|
||||||
|
|
||||||
[[nodiscard]] ustring read_file(const string &filename) {
|
const size_t DICT_MAX = static_cast<size_t>(ipow(2, 17) - 256); /* 16 bits */
|
||||||
std::ifstream file{filename, ios::binary};
|
|
||||||
assert(file);
|
|
||||||
file.unsetf(ios::skipws);
|
|
||||||
file.seekg(0, ios::end);
|
|
||||||
const auto file_size = file.tellg();
|
|
||||||
file.seekg(0, ios::beg);
|
|
||||||
ustring res{};
|
|
||||||
res.reserve(file_size);
|
|
||||||
res.insert(res.begin(), std::istream_iterator<unsigned char>(file),
|
|
||||||
std::istream_iterator<unsigned char>());
|
|
||||||
file.close();
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] vvuint16 lzw_compress(ustring &&t_text) {
|
/**
|
||||||
vvuint16 res{};
|
* La chaîne de caractères \p t_text est lue caractère par caractère, et est
|
||||||
const auto DICT_MAX = static_cast<size_t>(ipow(2, 14) - 256); /* 16 bits */
|
* selon la valeur de retour de la fonction \ref dico (permettant dans le même
|
||||||
|
* temps la création du dictionnaire), on rajoute ou non un nouveau caractère
|
||||||
|
* encodé sur 12bits dans le chunk courant. Dès que le dictionnaire est plein
|
||||||
|
* (2^12 caractères), le chunk est sauvegardé et vidé, et le dictionnaire est
|
||||||
|
* réinitialisé.
|
||||||
|
*
|
||||||
|
* \param t_text Chaîne de caractères uint8_t représentant le fichier d'entrée
|
||||||
|
* \return Vecteur de chunks (vecteurs de uint16_t)
|
||||||
|
*/
|
||||||
|
vvuint16 lzw_compress(ustring &&t_text) {
|
||||||
|
std::puts("Compressing...");
|
||||||
uint16_t w = 0xFFFF;
|
uint16_t w = 0xFFFF;
|
||||||
vuint16 chunk{};
|
vuint16 chunk{};
|
||||||
|
vvuint16 res{};
|
||||||
dict_t dict{};
|
dict_t dict{};
|
||||||
for (const auto c : t_text) {
|
for (const auto c : t_text) {
|
||||||
if (dict.size() >= DICT_MAX) {
|
if (dict.size() >= DICT_MAX) {
|
||||||
if (w != 0xFFFF) {
|
res.push_back(std::move(chunk));
|
||||||
chunk.push_back(w);
|
chunk = vuint16{};
|
||||||
}
|
dict = dict_t{};
|
||||||
res.push_back(chunk);
|
|
||||||
w = 0xFFFF;
|
w = 0xFFFF;
|
||||||
chunk.clear();
|
|
||||||
dict.clear();
|
|
||||||
}
|
}
|
||||||
if (const auto &[exists, pos] = dico(dict, w, static_cast<uint8_t>(c));
|
if (const auto &[yes, pos] = dico(dict, w, static_cast<uint8_t>(c)); yes) {
|
||||||
exists) {
|
|
||||||
w = pos;
|
w = pos;
|
||||||
} else {
|
} else {
|
||||||
chunk.push_back(w);
|
chunk.push_back(w);
|
||||||
@ -63,15 +60,30 @@ using std::printf;
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper de la fonction \ref lzw_compress gérant l'ouverture, la lecture,
|
||||||
|
* l'écriture et la fermeture des fichiers d’entrée et de sortie. Si \p
|
||||||
|
* t_out_file est nul (chemin non spécifié), il prendra alors la valeur de
|
||||||
|
* \p t_in_file à laquelle sera annexé l’extension `.lzw`.
|
||||||
|
*
|
||||||
|
* \param[in] t_in_file Chemin vers le fichier d’entrée
|
||||||
|
* \param[in] t_out_file Chemin vers le fichier de sortie
|
||||||
|
*/
|
||||||
void compress(const std::string &t_in_file, const char *t_out_file) {
|
void compress(const std::string &t_in_file, const char *t_out_file) {
|
||||||
std::ofstream out{(t_out_file != nullptr) ? t_out_file : "output.lzw",
|
FILE *const input_file = fopen(t_in_file.c_str(), "rb");
|
||||||
ios::out | ios::binary};
|
assert(input_file);
|
||||||
if (!out.is_open()) {
|
FILE *const out = (t_out_file != nullptr) ? fopen(t_out_file, "wb")
|
||||||
std::cerr << "Error at " << __FILE__ << ":" << __LINE__ - 4
|
: fopen("output.lzw", "wb");
|
||||||
<< ": could not open output file. Aborting...\n";
|
assert(out);
|
||||||
exit(1);
|
|
||||||
}
|
std::fseek(input_file, 0L, SEEK_END);
|
||||||
const auto compressed_text(lzw_compress(read_file(t_in_file)));
|
const auto file_size = static_cast<size_t>(ftell(input_file));
|
||||||
|
std::rewind(input_file);
|
||||||
|
|
||||||
|
auto raw_text = std::make_unique<unsigned char[]>(file_size);
|
||||||
|
std::fread(raw_text.get(), sizeof(unsigned char), file_size, input_file);
|
||||||
|
const auto compressed_text(lzw_compress(ustring{raw_text.get(), &raw_text[file_size]}));
|
||||||
write_file(out, compressed_text);
|
write_file(out, compressed_text);
|
||||||
out.close();
|
fclose(out);
|
||||||
|
fclose(input_file);
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
/**
|
||||||
|
* \file compress.hh
|
||||||
|
* \brief Header for compression functions
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef LZW_SRC_COMPRESS_H_
|
#ifndef LZW_SRC_COMPRESS_H_
|
||||||
#define LZW_SRC_COMPRESS_H_
|
#define LZW_SRC_COMPRESS_H_
|
||||||
|
|
||||||
@ -6,9 +11,11 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
[[nodiscard]] std::vector<std::vector<std::uint16_t>>
|
/// \brief Compression d'une chaine de caractères
|
||||||
|
std::vector<std::vector<std::uint16_t>>
|
||||||
lzw_compress(std::basic_string<unsigned char> &&);
|
lzw_compress(std::basic_string<unsigned char> &&);
|
||||||
|
|
||||||
|
/// \brief Wrapper de \ref lzw_compress
|
||||||
void compress(const std::string &, const char *);
|
void compress(const std::string &, const char *);
|
||||||
|
|
||||||
#endif /* LZW_SRC_COMPRESS_H_ */
|
#endif /* LZW_SRC_COMPRESS_H_ */
|
||||||
|
62
src/io.cc
62
src/io.cc
@ -1,27 +1,65 @@
|
|||||||
|
/**
|
||||||
|
* \file io.cc
|
||||||
|
* \brief Body for file reading and writing
|
||||||
|
*/
|
||||||
|
|
||||||
#include "io.hh"
|
#include "io.hh"
|
||||||
#include "bitpack.hh"
|
#include "bitpack.hh"
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#ifdef Debug
|
||||||
|
constexpr bool debug_mode = true;
|
||||||
|
#include <algorithm>
|
||||||
|
#else
|
||||||
|
constexpr bool debug_mode = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
using std::uint16_t;
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
using std::uint16_t;
|
||||||
using vuint16 = vector<uint16_t>;
|
using vuint16 = vector<uint16_t>;
|
||||||
using vvuint16 = vector<vuint16>;
|
using vvuint16 = vector<vuint16>;
|
||||||
|
|
||||||
void write_file(std::ofstream &t_out, const vvuint16 &t_chunks) {
|
/**
|
||||||
const auto nr_chunks = static_cast<uint16_t>(t_chunks.size());
|
* Écrit dans le fichier \p t_out les chunks passés en paramètre. Le fichier de
|
||||||
#ifdef Debug
|
* sortie est composé des éléments suivants :\n
|
||||||
std::printf("Number of chunks: %u\n", nr_chunks);
|
* - Sur deux octets sont écrit un `uint16_t` déterminant le nombre de chunk
|
||||||
#endif
|
* composant le fichier\n
|
||||||
t_out.write(reinterpret_cast<const char *>(&nr_chunks), sizeof(nr_chunks));
|
* - Sont ensuite écrits les chunks sur un nombre variable d’octets suivant la
|
||||||
for (const auto &chunk : t_chunks) {
|
* taille des chunks\n
|
||||||
|
* \n
|
||||||
|
* Un chunk est composé de la manière qui suit :\n
|
||||||
|
* - Sur deux octets sont écrit un `uint32_t` déterminant le nombre d’octets
|
||||||
|
* composant le chunk\n
|
||||||
|
* - Sur le nombre d’octets précisés par le header du chunk se trouvent les
|
||||||
|
* données compressées par l’algorithme lzw puis via bit-packing.\n
|
||||||
|
*
|
||||||
|
* \param[out] t_out Fichier de sortie
|
||||||
|
* \param[in] t_text Collection ordonnée des chunks à écrire dans \p t_out
|
||||||
|
*/
|
||||||
|
void write_file(FILE *const t_out, const vvuint16 &t_text) {
|
||||||
|
const auto size = static_cast<uint16_t>(t_text.size());
|
||||||
|
if constexpr (debug_mode) {
|
||||||
|
std::printf("Number of chunks: %u\n", size);
|
||||||
|
}
|
||||||
|
fwrite(&size, sizeof(size), 1, t_out);
|
||||||
|
for (const auto &chunk : t_text) {
|
||||||
|
if constexpr (debug_mode)
|
||||||
|
std::printf("Chunk!\n");
|
||||||
write_chunk(t_out, chunk);
|
write_chunk(t_out, chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_chunk(std::ofstream &t_out, const vuint16 &t_chunk) {
|
/**
|
||||||
|
* Écrit dans le fichier \p t_out le chunk unique \p t_chunk. Se référer à la
|
||||||
|
* documentation de \ref write_file pour plus de détails.
|
||||||
|
*
|
||||||
|
* \param t_out Output file
|
||||||
|
* \param t_chunk Chunk to be written to \p t_out
|
||||||
|
*/
|
||||||
|
void write_chunk(FILE *const t_out, const vuint16 &t_chunk) {
|
||||||
const auto output = pack(t_chunk);
|
const auto output = pack(t_chunk);
|
||||||
const auto chunk_size = static_cast<uint32_t>(output.size());
|
const auto chunk_size = static_cast<uint32_t>(output.size());
|
||||||
t_out.write(reinterpret_cast<const char *>(&chunk_size), sizeof(chunk_size));
|
fwrite(&chunk_size, sizeof(chunk_size), 1, t_out);
|
||||||
t_out.write(reinterpret_cast<const char *>(output.data()),
|
fwrite(output.data(), sizeof(output[0]), output.size(), t_out);
|
||||||
sizeof(output[0]) * output.size());
|
|
||||||
}
|
}
|
||||||
|
28
src/io.hh
28
src/io.hh
@ -1,15 +1,35 @@
|
|||||||
|
/**
|
||||||
|
* \file io.hh
|
||||||
|
* \brief Header for file reading and writing
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef LZW_SRC_IO_H_
|
#ifndef LZW_SRC_IO_H_
|
||||||
#define LZW_SRC_IO_H_
|
#define LZW_SRC_IO_H_
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
void write_file(std::ofstream &,
|
/*
|
||||||
const std::vector<std::vector<std::uint16_t>> &);
|
* Un fichier compressé se compose ainsi :
|
||||||
|
* char_size : taille d'un caractère en bits (1B)
|
||||||
|
* nb_chunk : nombre de chunks (4B)
|
||||||
|
* chunks* : chunks
|
||||||
|
*
|
||||||
|
* Un chunk se compose ainsi :
|
||||||
|
* nb_char_chunk : nombre de caractères du chunk (2B)
|
||||||
|
* text* : caractères de taille char_size (ceil((char_size * nb_char_chunk) /
|
||||||
|
* 8))
|
||||||
|
*
|
||||||
|
* Si le dernier caractère ne termine pas le dernier octet du chunk, les
|
||||||
|
* derniers bits sont mit à zéro
|
||||||
|
*/
|
||||||
|
|
||||||
void write_chunk(std::ofstream &, const std::vector<std::uint16_t> &);
|
/// \brief Écrit dans le fichier le texte compressé
|
||||||
|
void write_file(FILE *, const std::vector<std::vector<std::uint16_t>> &);
|
||||||
|
|
||||||
|
/// \brief Écrit un chunk dans le fichier de sortie
|
||||||
|
void write_chunk(FILE *, const std::vector<std::uint16_t> &);
|
||||||
|
|
||||||
#endif /* LZW_SRC_IO_H_ */
|
#endif /* LZW_SRC_IO_H_ */
|
||||||
|
73
src/main.cc
73
src/main.cc
@ -1,8 +1,16 @@
|
|||||||
|
/**
|
||||||
|
* \file main.cc
|
||||||
|
* \brief Main file
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <getopt.h>
|
||||||
|
#include <cassert>
|
||||||
|
#include <tuple>
|
||||||
#include "compress.hh"
|
#include "compress.hh"
|
||||||
#include "uncompress.hh"
|
#include "uncompress.hh"
|
||||||
#include <cassert>
|
|
||||||
#include <getopt.h>
|
|
||||||
#include <tuple>
|
|
||||||
|
|
||||||
using std::printf;
|
using std::printf;
|
||||||
using std::puts;
|
using std::puts;
|
||||||
@ -11,35 +19,41 @@ using std::tuple;
|
|||||||
|
|
||||||
// custom types ///////////////////////////////////////////////////////////////
|
// custom types ///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/*
|
||||||
|
Dictionnaire :
|
||||||
|
<
|
||||||
|
<
|
||||||
|
numéro chaine précédente,
|
||||||
|
caractère ASCII
|
||||||
|
>
|
||||||
|
numéro chaine courante
|
||||||
|
>
|
||||||
|
*/
|
||||||
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
|
using dic_t = std::map<std::pair<uint32_t, uint8_t>, uint32_t>;
|
||||||
using ustring = std::basic_string<uint8_t>; // chaine non encodée
|
using ustring = std::basic_string<uint8_t>; // chaine non encodée
|
||||||
using uvec = std::vector<uint32_t>; // chaine encodée
|
using uvec = std::vector<uint32_t>; // chaine encodée
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Affichage d’aide
|
||||||
|
*/
|
||||||
void help() {
|
void help() {
|
||||||
puts("Usage:\n\
|
puts("Usage:");
|
||||||
lzw [-options] [-i path] [-o path]\n\n\
|
puts("lzw [-options] [-i path] [-o path]");
|
||||||
The default action is to compress the input file to a .lzw file\n\
|
puts("\tThe default action is to compress the input file to a .lzw file");
|
||||||
in which the directory in which the software is executed.\n\
|
puts("\tin which the directory in which the software is executed.");
|
||||||
Options available:\n\
|
puts("\tOptions available:");
|
||||||
-h --help\n\
|
puts("\t-i\tpath to the input file (mandatory)");
|
||||||
\tdisplay the current message\n\
|
puts("\t-o\tpath to the output file (if the file already exists, it will");
|
||||||
-i --input\n\
|
puts("\t\tbe overwritten). Default: input path + \".lzw\"");
|
||||||
\tpath to the input file (MANDATORY)\n\
|
puts("\t-c\tcompress the input file");
|
||||||
-o --output\n\
|
puts("\t-u\tuncompresses the input file to the output file. If no output");
|
||||||
\tpath to the output file (if the file already exists, it will be\n\n\
|
puts("\t\tpath has not been entered and if the input file ends with ");
|
||||||
\toverwritten). Default: input path + \".lzw\\n\
|
puts("\t\t\".lzw\", the extension \".lzw\" will be removed; otherwise, the ");
|
||||||
-c --compress\n\
|
puts("\t\textension \".uncompresed\" will be added");
|
||||||
\tcompress the input file\n\
|
|
||||||
-u --uncompress\n\
|
|
||||||
\tuncompresses the input file to the output file. If no output path\n\
|
|
||||||
\thas not been entered and if the input file ends with \".lzw\",\n\
|
|
||||||
\tthe extension \".lzw\" will be removed; otherwise, the extension\n\
|
|
||||||
\t\"_uncompresed\" will be added");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::tuple<string, string, bool> process_args(int t_argc,
|
std::tuple<string, string, bool> process_args(int t_argc, char *t_argv[]) {
|
||||||
char *t_argv[]) {
|
auto ret = std::make_tuple(string{}, string{}, false);
|
||||||
auto ret = std::make_tuple(string{}, string{}, true);
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int option_index = 0;
|
int option_index = 0;
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
@ -50,8 +64,7 @@ Options available:\n\
|
|||||||
{"uncompress", no_argument, nullptr, 'u'},
|
{"uncompress", no_argument, nullptr, 'u'},
|
||||||
{nullptr, 0, nullptr, 0}};
|
{nullptr, 0, nullptr, 0}};
|
||||||
int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
|
int c = getopt_long(t_argc, t_argv, "hi:o:cu", long_options, &option_index);
|
||||||
if (c == -1)
|
if (c == -1) break;
|
||||||
break;
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
@ -81,12 +94,10 @@ Options available:\n\
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TODO: compression multiple : nombre de compressions puis fichier compressé */
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
const auto [input_path, output_path, compressing] = process_args(argc, argv);
|
const auto [input_path, output_path, compressing] = process_args(argc, argv);
|
||||||
if (input_path.empty()) {
|
assert(!input_path.empty());
|
||||||
help();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (compressing) {
|
if (compressing) {
|
||||||
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
|
compress(input_path, (output_path.empty()) ? nullptr : output_path.c_str());
|
||||||
} else {
|
} else {
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <fstream>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
using std::fclose;
|
using std::fclose;
|
||||||
using std::fopen;
|
using std::fopen;
|
||||||
@ -16,16 +14,18 @@ using std::vector;
|
|||||||
using ustring = std::basic_string<unsigned char>;
|
using ustring = std::basic_string<unsigned char>;
|
||||||
using vuint16 = vector<uint16_t>;
|
using vuint16 = vector<uint16_t>;
|
||||||
|
|
||||||
[[nodiscard]] ustring lzw_uncompress(vuint16 &&t_compressed) {
|
ustring lzw_uncompress(vuint16 &&t_compressed) {
|
||||||
ustring ret{};
|
ustring ret{};
|
||||||
uint16_t old = 0;
|
uint16_t old = 0;
|
||||||
std::map<uint16_t, ustring> dict{};
|
std::map<uint16_t, ustring> dict{};
|
||||||
ret.append({static_cast<unsigned char>(t_compressed[0])});
|
uint16_t v = t_compressed[0];
|
||||||
old = t_compressed[0];
|
ret.append({static_cast<unsigned char>(v)});
|
||||||
|
old = v;
|
||||||
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
|
for (auto it = t_compressed.begin() + 1; it != t_compressed.end(); ++it) {
|
||||||
const auto uncompressed{dico_uncompress(dict, *it, old)};
|
v = *it;
|
||||||
|
const auto uncompressed{dico_uncompress(dict, v, old)};
|
||||||
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
|
ret.insert(ret.end(), uncompressed.begin(), uncompressed.end());
|
||||||
old = *it;
|
old = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -34,27 +34,31 @@ using vuint16 = vector<uint16_t>;
|
|||||||
void uncompress(const string &t_input_name, const char *t_output_name) {
|
void uncompress(const string &t_input_name, const char *t_output_name) {
|
||||||
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
|
FILE *const input = std::fopen(t_input_name.c_str(), "rb");
|
||||||
assert(input);
|
assert(input);
|
||||||
std::ofstream output{(t_output_name != nullptr)
|
|
||||||
? t_output_name
|
FILE *const output =
|
||||||
: t_input_name + "_uncompressed",
|
(t_output_name != nullptr)
|
||||||
std::ios::out | std::ios::binary};
|
? std::fopen(t_output_name, "wb")
|
||||||
assert(output.is_open());
|
: std::fopen((t_input_name + "_uncompressed").c_str(), "wb");
|
||||||
|
assert(output);
|
||||||
|
|
||||||
uint16_t nb_chunks = 0;
|
uint16_t nb_chunks = 0;
|
||||||
|
std::fseek(input, 0, SEEK_SET);
|
||||||
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
|
std::fread(&nb_chunks, sizeof(nb_chunks), 1, input);
|
||||||
|
|
||||||
for (uint16_t i = 0; i < nb_chunks; ++i) {
|
for (uint16_t i = 0; i < nb_chunks; ++i) {
|
||||||
uncompress_chunk(input, output);
|
uint32_t size_chunk = 0;
|
||||||
|
fread(&size_chunk, sizeof(size_chunk), 1, input);
|
||||||
|
auto *chunk = static_cast<unsigned char *>(
|
||||||
|
std::malloc(sizeof(unsigned char) * size_chunk));
|
||||||
|
fread(chunk, sizeof(unsigned char), size_chunk, input);
|
||||||
|
|
||||||
|
auto unpacked = unpack(ustring{chunk, chunk + size_chunk});
|
||||||
|
const auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
|
||||||
|
// sometimes will add null char
|
||||||
|
std::fwrite(uncompressed_chunk.data(), sizeof(uncompressed_chunk[0]),
|
||||||
|
uncompressed_chunk.size(), output);
|
||||||
}
|
}
|
||||||
output.close();
|
|
||||||
|
std::fclose(output);
|
||||||
std::fclose(input);
|
std::fclose(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
void uncompress_chunk(FILE *const t_input, std::ofstream &t_output) {
|
|
||||||
uint32_t size_chunk = 0;
|
|
||||||
fread(&size_chunk, sizeof(size_chunk), 1, t_input);
|
|
||||||
auto chunk = std::make_unique<unsigned char[]>(size_chunk);
|
|
||||||
fread(chunk.get(), sizeof(unsigned char), size_chunk, t_input);
|
|
||||||
auto unpacked = unpack(ustring{chunk.get(), chunk.get() + size_chunk});
|
|
||||||
auto uncompressed_chunk = lzw_uncompress(std::move(unpacked));
|
|
||||||
t_output.write(reinterpret_cast<const char *>(uncompressed_chunk.data()),
|
|
||||||
sizeof(uncompressed_chunk[0]) * uncompressed_chunk.size());
|
|
||||||
}
|
|
||||||
|
@ -1,16 +1,12 @@
|
|||||||
#ifndef LZW_SRC_UNCOMPRESS_H_
|
#ifndef LZW_SRC_UNCOMPRESS_H_
|
||||||
#define LZW_SRC_UNCOMPRESS_H_
|
#define LZW_SRC_UNCOMPRESS_H_
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
[[nodiscard]] std::basic_string<unsigned char>
|
std::basic_string<unsigned char> lzw_uncompress(std::vector<std::uint16_t> &&);
|
||||||
lzw_uncompress(std::vector<std::uint16_t> &&);
|
|
||||||
|
|
||||||
void uncompress(const std::string &, const char *);
|
void uncompress(const std::string &, const char *);
|
||||||
|
|
||||||
void uncompress_chunk(FILE *, std::ofstream &);
|
|
||||||
|
|
||||||
#endif /* LZW_SRC_UNCOMPRESS_H_ */
|
#endif /* LZW_SRC_UNCOMPRESS_H_ */
|
||||||
|
Loading…
Reference in New Issue
Block a user