Compare commits

..

14 Commits

Author SHA1 Message Date
531c9b1acc Add From<&str> trait implementation to Rule, ToString to Regex
Also remove obvious test for Rule
2020-07-12 12:30:15 +02:00
cb933d4896 Added documentation and tests 2020-07-12 12:29:39 +02:00
d33190a46d Cleaner code 2020-07-12 12:28:19 +02:00
bb9d8703f3 Cleaner output while running tests 2020-07-12 12:24:00 +02:00
144771a2c0 Stricter clippy rules 2020-07-12 12:23:10 +02:00
22fbe5aba7 Add Default trait to Settings 2020-07-11 17:52:14 +02:00
9cc1a52e5a Better debug messages and logging handling 2020-07-11 17:51:38 +02:00
78541f10ba Added and updated documentation and tests
Some undocumented functions and methods are now documented and tested.

Some doc tests were modified to become compilable, going from `ignore`
to `no_run`, or runable, going from `no_run` to regular tests.
2020-07-11 17:48:27 +02:00
8ddd33d76d Improve code and tests readability 2020-07-11 17:47:28 +02:00
a8bafc072b Remove unnecessary code
This commit removes some unnecessary code, such as an extra `String`
clone, unnecessary `return` statements, and an inherent `to_string`
function for the `Regex` wrapper.

A double-quote character considered as a string literal by the
compiler was also changed to a single-quote character, this time
considered as a character by the compiler, for some optimization.

Also a newline is added in order to improve code readablitiy.
2020-07-11 17:40:20 +02:00
3c960eaa35 Move some tests to lib.rs 2020-07-11 17:35:57 +02:00
de45ffc15c Update dependencies, remove unneeded keywords and declarations
Also declaration of a lazy_static element was moved in a better spot
2020-07-11 17:34:01 +02:00
042fd066f0 Split Rule::update in several functions
Also optimization with a static Regex
2020-04-05 15:51:23 +02:00
071c8b0728 Made mod utils public 2020-04-05 15:48:41 +02:00
6 changed files with 321 additions and 128 deletions

View File

@@ -9,10 +9,15 @@ edition = "2018"
[dependencies]
# Logger
log = "0.4"
simplelog = "0.7"
simplelog = "0.8"
# Struct serializing and deserializing
serde = {version = "1.0", features = ["derive"]}
serde_yaml = "0.7"
serde_yaml = "0.8"
serde_json = "1.0"
# Regex support
regex = "1.3"
regex = "1.3"
lazy_static = "1.4"
# [dev-dependencies]
# Pretty output
prettydiff = "0.3"

View File

@@ -1,4 +1,5 @@
#![crate_name = "lang_evolve_core"]
#![deny(clippy::all)]
//! # LangEvolveCore
//!
@@ -13,13 +14,12 @@
//! user-defined sound changes to words and texts based on regex expressions.
use std::fs::File;
extern crate log;
extern crate simplelog;
use log::{info, warn};
use simplelog::*;
pub mod settings;
mod utils;
pub mod utils;
/// Initializes the crate
///
@@ -41,24 +41,39 @@ mod utils;
/// lang_evolve_core::init();
/// ```
pub fn init() -> std::result::Result<(), log::SetLoggerError> {
match CombinedLogger::init(vec![
TermLogger::new(
LevelFilter::Warn,
Config::default(),
TerminalMode::Mixed,
)
.unwrap(),
WriteLogger::new(
LevelFilter::Debug,
Config::default(),
File::create("core.log").unwrap(),
),
WriteLogger::new(
LevelFilter::Info,
Config::default(),
File::create("core.log").unwrap(),
),
]) {
// #[cfg(debug_assertions)]
match CombinedLogger::init(if cfg!(debug_assertions) {
vec![
WriteLogger::new(
LevelFilter::Warn,
Config::default(),
File::create("core.log").unwrap(),
),
WriteLogger::new(
LevelFilter::Debug,
Config::default(),
File::create("core.log").unwrap(),
),
WriteLogger::new(
LevelFilter::Info,
Config::default(),
File::create("core.log").unwrap(),
),
]
} else {
vec![
WriteLogger::new(
LevelFilter::Warn,
Config::default(),
File::create("core.log").unwrap(),
),
WriteLogger::new(
LevelFilter::Info,
Config::default(),
File::create("core.log").unwrap(),
),
]
}) {
Err(why) => {
warn!("Could not initialize logger: {}", why.to_string());
Err(why)
@@ -69,3 +84,29 @@ pub fn init() -> std::result::Result<(), log::SetLoggerError> {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn write_settings() {
let s = settings::Settings::new();
let path = std::path::Path::new("settings.yaml");
let settings = r#"---
version: "1"
categories: {}
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());
}
#[test]
fn read_settings() {
let s1 = settings::Settings::new();
let path = std::path::Path::new("settings.yml");
s1.export(&path).unwrap();
let s2 = settings::Settings::import(&path).unwrap();
assert_eq!(s1, s2);
}
}

View File

@@ -1,13 +1,11 @@
extern crate serde;
extern crate serde_json;
extern crate serde_yaml;
use serde::{Deserialize, Serialize};
extern crate log;
use log::{debug, error, info};
use crate::utils::{self, SettingsType};
use prettydiff::diff_words;
mod rule;
use rule::Rule;
@@ -26,17 +24,17 @@ const RULESET_CURRENT_VERSION: i32 = 1;
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::settings::*;
/// # use lang_evolve_core::encode_settings;
/// use std::io::{Error, ErrorKind};
/// ```no_run
/// use std::path::Path;
/// let filetype = utils::get_file_type(Path::new("./path/to/file.json"));
/// let s = Settings::new();
/// use lang_evolve_core::utils;
///
/// let settings = Settings::new();
/// let filetype = utils::get_file_type(Path::new("settings.yml"));
///
/// let content = match filetype {
/// utils::SettingsType::Yaml => encode_settings!(serde_yaml, &s).unwrap(),
/// utils::SettingsType::Json => encode_settings!(serde_json, &s).unwrap(),
/// _ => panic!("Could not encode settings"),
/// SettingsType::Yaml => encode_settings!(serde_yaml, &settings),
/// SettingsType::Json => encode_settings!(serde_json, &settings),
/// _ => String::from("Error!"),
/// };
/// ```
///
@@ -66,7 +64,7 @@ macro_rules! encode_settings {
///
/// # Example
///
/// ```ignore
/// ```no_run
/// # use lang_evolve_core::decode_settings;
/// let str = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = decode_settings!(serde_json, str);
@@ -89,6 +87,7 @@ macro_rules! decode_settings {
}
use std::collections::HashMap;
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
@@ -163,9 +162,15 @@ impl Settings {
///
/// # Example
///
/// ```no_run
/// ```
/// use std::path::Path;
/// use lang_evolve_core::settings::Settings;
/// # use lang_evolve_core::settings::Settings;
/// # let s = Settings::new();
/// # for path in vec!["settings.json", "settings.yaml", "settings.yml"] {
/// # let path = Path::new(path);
/// # s.export(&path).unwrap();
/// # }
///
/// let path_json = Path::new("settings.json");
/// let _s_json = Settings::import(&path_json).unwrap();
///
@@ -201,15 +206,17 @@ impl Settings {
}
},
};
if settings.version > Self::get_ruleset_version() {
error!("Ruleset version too high!");
return Err(std::io::Error::new(
Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Ruleset version too high!",
));
))
} else {
info!("Successfuly imported {}", path.display());
Ok(settings)
}
info!("Successfuly imported {}", path.display());
Ok(settings)
}
/// Import settings from file path described by the argument `path`
@@ -220,16 +227,20 @@ impl Settings {
///
/// # Example
///
/// ```no_run
/// let s = lang_evolve_core::settings::Settings::from("settings.yml");
/// ```
pub fn from<S>(s: S) -> Self
/// # use lang_evolve_core::settings::Settings;
/// # use std::path::Path;
/// # let s = Settings::default();
/// # s.export(Path::new("settings.yml"));
/// let s = Settings::from("settings.yml");
/// ```
pub fn from<S>(s: S) -> std::io::Result<Self>
where
S: ToString,
{
let s = s.to_string();
let path = std::path::Path::new(&s);
Self::import(&path).unwrap()
Self::import(&path)
}
/// Add a new rule to the current settings
@@ -238,6 +249,20 @@ impl Settings {
///
/// * `from` - Regex that should match the text to be replaced
/// * `to` - Regex that should replace some text
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let mut settings = Settings::default();
/// settings.add_rule("ha", "wa");
///
/// use std::str::FromStr;
/// let reference = Settings::from_str(
/// r#"{"version":"1","categories":{},"rules":[{"from":"ha","to":"wa"}]}"#)
/// .unwrap();
/// assert_eq!(reference, settings);
/// ```
pub fn add_rule(&mut self, from: &str, to: &str) {
self.rules.push(Rule::new(from, to))
}
@@ -248,6 +273,20 @@ impl Settings {
///
/// * `name` - Name of the category
/// * `content` - Content of the category, phonemes
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let mut settings = Settings::default();
/// settings.add_category("C", "abcde");
///
/// use std::str::FromStr;
/// let reference = Settings::from_str(
/// r#"{"version":"1","categories":{"C": "abcde"},"rules":[]}"#)
/// .unwrap();
/// assert_eq!(reference, settings);
/// ```
pub fn add_category(&mut self, name: &str, content: &str) {
self.categories.insert(String::from(name), String::from(content));
}
@@ -263,17 +302,19 @@ impl Settings {
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// use std::path::Path;
/// let s = lang_evolve_core::settings::Settings::new();
///
/// let s = Settings::new();
///
/// // Export to JSON
/// let path_json = Path::new("./output.json");
/// let path_json = Path::new("settings.json");
/// s.export(&path_json).unwrap();
///
/// // Export to Yaml, both ".yml" and ".yaml" work
/// let path_yaml = Path::new("./output.yaml");
/// let path_yaml = Path::new("settings.yaml");
/// s.export(&path_yaml).unwrap();
/// let path_yml = Path::new("./output.yml");
/// let path_yml = Path::new("settings.yml");
/// s.export(&path_yml).unwrap();
/// ```
///
@@ -300,18 +341,19 @@ impl Settings {
RULESET_CURRENT_VERSION.to_string()
}
/// Transform input rules into Regexes that can be understood by Rust.
fn update_rules(&self) -> std::result::Result<Vec<Rule>, String> {
let rules = self.rules.clone();
let rules: Vec<Rule> = rules
.iter()
.map(|x| x.update(&self.categories).unwrap())
.map(|rule| rule.update(&self.categories).unwrap())
.collect();
Ok(rules)
}
/// Apply list of rules to input
///
/// The list of rules in the struct will be applied to the input `s`. If the
/// The list of rules in the struct will be applied to the input `new`. If the
/// rule contains the `%` character followed by a capital letter, this marks
/// a category of phonemes and should be replaced by them. For instance, we
/// have:
@@ -321,7 +363,7 @@ impl Settings {
///
/// # Arguments
///
/// * `s` - Input to modify
/// * `new` - Input to modify
///
/// # Example
///
@@ -329,32 +371,62 @@ impl Settings {
/// # use lang_evolve_core::settings::Settings;
/// let settings = Settings::new();
/// // add some rules...
/// let input = String::new();
/// // set some input
/// let input = String::new();
/// let _output = settings.apply(input);
/// ```
pub fn apply(&self, s: String) -> std::result::Result<String, String> {
// TODO Add Error handling
let rules = self.update_rules().unwrap();
let mut s = s.clone();
let mut s = s;
debug!("===============================================");
for rule in rules {
debug!(
"from: {}\tto: {}",
"from: \"{}\"\tto: \"{}\"",
rule.get_from().to_string(),
rule.get_to()
);
debug!("old: {}", s);
s = rule
let old = s.clone();
let new = rule
.get_from()
.replace_all(&s, rule.get_to().as_str())
.to_string();
debug!("new: {}", s);
if cfg!(debug_assertions) {
let diffs = diff_words(&old, &new);
if diffs.diff().len() > 1 {
debug!("diff:\n{}", diff_words(&old, &new));
} else {
debug!("diff: No changes");
}
}
s = new;
}
Ok(s)
}
}
impl Default for Settings {
/// Creates a new empty instance of [`Settings`]
///
/// # Example
///
/// ```
/// let s = lang_evolve_core::settings::Settings::default();
/// let content_yaml = r#"---
/// version: "1"
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
fn default() -> Self {
Self::new()
}
}
use std::str::FromStr;
impl FromStr for Settings {
type Err = serde_yaml::Error;
@@ -381,7 +453,7 @@ impl FromStr for Settings {
Ok(val) => Ok(val),
Err(e) => {
error!("Could not decode input {}: {}", s, e.to_string());
return Err(e);
Err(e)
}
},
}
@@ -405,24 +477,3 @@ impl PartialEq for Settings {
}
impl Eq for Settings {}
#[test]
fn write_settings() {
let s = Settings::new();
let path = std::path::Path::new("test.yaml");
let settings = r#"---
version: "1"
categories: {}
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());
}
#[test]
fn read_settings() {
let s1 = Settings::new();
let path = std::path::Path::new("test.yml");
s1.export(&path).unwrap();
let s2 = Settings::import(&path).unwrap();
assert_eq!(s1, s2);
}

View File

@@ -1,11 +1,15 @@
extern crate serde;
extern crate serde_json;
extern crate serde_yaml;
use std::collections::HashMap;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
mod regex_wrapper;
use regex_wrapper::Regex;
lazy_static! {
static ref RE: Regex = Regex::new("%([A-Z])");
}
/// Representation of a rule in LangEvolveRs
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Rule {
@@ -24,6 +28,15 @@ impl Rule {
/// the input text
/// * `to` - literal string that represents the regex text that should
/// replaced the text matched by `from`
///
/// # Example
/// ```
/// # use lazy_static::lazy_static;
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule = Rule::new("ab+c*", "ab");
/// ```
pub fn new(from: &str, to: &str) -> Self {
Rule {
from: Regex::new(from),
@@ -31,28 +44,59 @@ impl Rule {
}
}
/// Detect the number of categories in a rule
///
/// For a rule, this function detects the number of categories set in the
/// `from` member of a `Rule` and in its `to` member. The result is returned
/// as a tuple of `u8`s.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule = Rule::new("%Bea*i+", "a%A%C");
/// let nb_rules = rule.detect_number_categories();
/// assert_eq!((1 as u8, 2 as u8), nb_rules);
/// ```
pub fn detect_number_categories(&self) -> (u8, u8) {
let captures_from = self.from.to_string().matches('%').count() as u8;
let captures_to = self.to.matches('%').count() as u8;
(captures_from, captures_to)
}
fn simple_rewrite(&self, categories: &HashMap<String, String>) -> Self {
let mut rule = self.clone();
for (category, content) in categories {
rule.from = Regex::new(
rule.from
.to_string()
.replace(
format!("%{}", category).as_str(),
format!("[{}]", content).as_str(),
)
.as_str(),
);
}
rule
}
// TODO break categories in different rules
pub fn update(
&self,
categories: &std::collections::HashMap<String, String>,
categories: &HashMap<String, String>,
) -> std::result::Result<Rule, String> {
let mut rule = self.clone();
let re = Regex::new("%\\D");
let from_match = re.is_match(&self.from.as_str());
let to_match = re.is_match(&self.to.as_str());
if from_match && !to_match {
for (category, content) in categories {
rule.from = Regex::new(
rule.from
.to_string()
.replace(
format!("%{}", category).as_str(),
format!("[{}]", content).as_str(),
)
.as_str(),
);
}
let (from_match, to_match) = self.detect_number_categories();
// If there are only simple rewrites to make in the from String
if from_match > 0 && to_match == 0 {
rule = self.simple_rewrite(&categories);
}
// If there are equivalences between from and to
if from_match > 0 && to_match <= from_match && to_match > 0 {}
Ok(rule)
}
@@ -65,30 +109,71 @@ impl Rule {
}
}
impl From<&str> for Rule {
/// Allow to create a rule from a single literal string
///
/// It is possible to create a rule from a string, delimited by a `>`. This
/// means a rule like `%C>%D` will be interpreted as going from `%C` to
/// `%D`.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule1 = Rule::new("%C", "%D");
/// let rule2 = Rule::from("%C>%D");
/// assert_eq!(rule1, rule2);
/// ```
fn from(source: &str) -> Self {
let components: Vec<&str> = source.split_terminator('>').collect();
Rule::new(components[0], components[1])
}
}
impl From<String> for Rule {
/// Allow to create a rule from a single `String`
///
/// It is possible to create a rule from a string, delimited by a `>`. This
/// means a rule like `%C>%D` will be interpreted as going from `%C` to
/// `%D`.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule1 = Rule::new("%C", "%D");
/// let rule2 = Rule::from(String::from("%C>%D"));
/// assert_eq!(rule1, rule2);
/// ```
fn from(source: String) -> Self {
let components: Vec<&str> = source.split_terminator(">").collect();
Self {
from: Regex::new(components[0]),
to: String::from(components[1]),
}
let components: Vec<&str> = source.split_terminator('>').collect();
Rule::new(components[0], components[1])
}
}
impl PartialEq for Rule {
/// Equality between `Rule` structs
///
/// This allows for equality comparison between two `Rule` structs.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// use rule::Rule;
/// let rule1 = Rule::new("%C", "%D");
/// let rule2 = Rule::from("%C>%D");
/// assert!(rule1 == rule2);
/// assert!(rule2 == rule1);
/// ```
fn eq(&self, other: &Self) -> bool {
self.from == other.from && self.to == other.to
}
}
impl Eq for Rule {}
#[test]
fn rule_new() {
let rule1 = Rule::new("([ae]+)i", "${1}i");
let rule2 = Rule {
from: Regex::new("([ae]+)i"),
to: String::from("${1}i"),
};
assert_eq!(rule1, rule2);
}

View File

@@ -4,15 +4,24 @@ use std::{fmt, ops};
pub struct Regex(regex::Regex);
impl Regex {
/// Create a new Regex wrapper around regex::Regex;
///
/// # Arguments
///
/// * `s` - string litteral from which to create the new Regex
pub fn new(s: &str) -> Self {
Self(regex::Regex::new(s).unwrap())
}
/// Returns a string literal representation of the Regex
#[allow(unused)]
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
pub fn to_string(&self) -> String {
impl ToString for Regex {
fn to_string(&self) -> String {
self.0.to_string()
}
}

View File

@@ -1,4 +1,3 @@
extern crate log;
use log::{info, error};
use std::fs::File;
@@ -24,8 +23,9 @@ pub enum SettingsType {
/// # Example
///
/// ```no_run
/// # use lang_evolve_core::utils;
/// let path = std::path::Path::new("./some/path/to/my/file.json");
/// let content = lang_evolve_core::settings::utils::read_file(&path).unwrap();
/// let content = utils::read_file(&path).unwrap();
/// ```
pub fn read_file(path: &Path) -> Result<String> {
let display = path.display();
@@ -40,7 +40,7 @@ pub fn read_file(path: &Path) -> Result<String> {
match file.read_to_string(&mut content) {
Err(why) => {
error!("Could not read {}: {}", display, why.to_string());
return Err(why);
Err(why)
}
Ok(_) => {
info!("Content of {} read", display);
@@ -54,9 +54,10 @@ pub fn read_file(path: &Path) -> Result<String> {
/// # Example
///
/// ```no_run
/// # use lang_evolve_core::utils;
/// let content = String::from("This is my content");
/// let path = std::path::Path::new("./path/to/my/file.txt");
/// lang_evolve_core::settings::utils::write_file(&path, &content).unwrap();
/// utils::write_file(&path, &content).unwrap();
/// ```
pub fn write_file<S>(path: &Path, content: &S) -> Result<()>
where
@@ -99,17 +100,18 @@ where
/// # Example
///
/// ```
/// # use lang_evolve_core::utils;
/// let file_json = std::path::Path::new("file.json");
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Json,
/// lang_evolve_core::settings::utils::get_file_type(&file_json));
/// assert_eq!(utils::SettingsType::Json,
/// utils::get_file_type(&file_json));
///
/// let file_yaml = std::path::Path::new("file.yaml");
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Yaml,
/// lang_evolve_core::settings::utils::get_file_type(&file_yaml));
/// assert_eq!(utils::SettingsType::Yaml,
/// utils::get_file_type(&file_yaml));
///
/// let file_yml = std::path::Path::new("file.yml");
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Yaml,
/// lang_evolve_core::settings::utils::get_file_type(&file_yml));
/// assert_eq!(utils::SettingsType::Yaml,
/// utils::get_file_type(&file_yml));
/// ```
pub fn get_file_type(path: &Path) -> SettingsType {
let extension = match path.extension() {