Compare commits

...

32 Commits

Author SHA1 Message Date
531c9b1acc Add From<&str> trait implementation to Rule, ToString to Regex
Also remove obvious test for Rule
2020-07-12 12:30:15 +02:00
cb933d4896 Added documentation and tests 2020-07-12 12:29:39 +02:00
d33190a46d Cleaner code 2020-07-12 12:28:19 +02:00
bb9d8703f3 Cleaner output while running tests 2020-07-12 12:24:00 +02:00
144771a2c0 Stricter clippy rules 2020-07-12 12:23:10 +02:00
22fbe5aba7 Add Default trait to Settings 2020-07-11 17:52:14 +02:00
9cc1a52e5a Better debug messages and logging handling 2020-07-11 17:51:38 +02:00
78541f10ba Added and updated documentation and tests
Some undocumented functions and methods are now documented and tested.

Some doc tests were modified to become compilable, going from `ignore`
to `no_run`, or runable, going from `no_run` to regular tests.
2020-07-11 17:48:27 +02:00
8ddd33d76d Improve code and tests readability 2020-07-11 17:47:28 +02:00
a8bafc072b Remove unnecessary code
This commit removes some unnecessary code, such as an extra `String`
clone, unnecessary `return` statements, and an inherent `to_string`
function for the `Regex` wrapper.

A double-quote character considered as a string literal by the
compiler was also changed to a single-quote character, this time
considered as a character by the compiler, for some optimization.

Also a newline is added in order to improve code readablitiy.
2020-07-11 17:40:20 +02:00
3c960eaa35 Move some tests to lib.rs 2020-07-11 17:35:57 +02:00
de45ffc15c Update dependencies, remove unneeded keywords and declarations
Also declaration of a lazy_static element was moved in a better spot
2020-07-11 17:34:01 +02:00
042fd066f0 Split Rule::update in several functions
Also optimization with a static Regex
2020-04-05 15:51:23 +02:00
071c8b0728 Made mod utils public 2020-04-05 15:48:41 +02:00
5f30c6d636 Reorganized modules, made rules a separate struct
Rules are now a separate struct, and the `rules` member of
`settings::Settings` is now a `Vec<settings::rule::Rule`.

Several elements were made private, now can be accessed through
dedicated methods
2020-04-04 22:44:08 +02:00
dbbb1616dd Optimized rule rewriting 2020-04-04 18:54:46 +02:00
23e3acb182 Better category find and replace
I didn't have to do it in two `replace` calls

Now I have to check if the rule has a or several corresponding
categories in the initial and final regex, find whether they have the
same amount of elements, and if so create rules that allow elements from
each category to be mapped 1:1
2020-04-04 18:28:27 +02:00
6fb1c287e0 Rules are now kept sorted
Changed the rules from a HashMap to a Vec
2020-04-04 17:08:26 +02:00
ef8c02fc97 Good progress with applying rules, room for improvement
Rules are applied, however some bugs remain:
- The HashMap for rules should be replaced by a Vec so they can be
  stored in order and not randomly
- For some reasons, the `%` is not removed from some rules in the
  private function `update_rules` in the `Settings` struct.
- Make it so replacements between square brackets work correctly
2020-04-04 15:46:29 +02:00
bae1d86544 Switched from Vectors to HashMaps, need to update docs 2020-03-29 18:10:45 +02:00
6be0f7e8f6 Switched rules to Regexes, added Regex wrapper for serde 2020-03-29 03:10:19 +02:00
d13836e433 Removed comments, made Settings members public, doc 2020-03-29 03:09:48 +02:00
f3e672e29c Organized imports 2020-03-29 03:07:26 +02:00
c418323b5c Removed unused uses 2020-03-29 03:06:46 +02:00
c6baa46aca Removed import_input
This should be left to the client crate, `settings::utils::read_file`
is already here for that
2020-03-28 22:38:28 +01:00
18eb16e777 Rewritten some parts as macros, updated extension handling
Encoding and decoding of `settings::Settings` now handled in macros to
avoid code repetition. I also wish rewrite them both in a combined
macro that would either encode or decode the struct depending on how
it was called.

Replaced some log warnings with errors

Now if type extension is not valid, the code will still attempt to
decode it, first as JSON data, then as Yaml data. If both fail, then
an error is returned. For this, I added the Display trait to the
`settings::Settings` struct which allows the FromStr trait, and due
to conflicting implementation, I removed the From trait and
implemented manually the `from` method with the same signature -- the
struct just lacks the trait that comes with it.
2020-03-28 22:03:21 +01:00
d200367ee0 Updated logger options, changed filetype behavior
Replaced lots of warns by errors

Filetype detection won’t return an error on unknown extension anymore,
but will return an `Other` value so it can be handled by other
functions not as an error already. See upcoming commits.
2020-03-28 21:59:23 +01:00
e9a161f526 Updated import_input signature, documentation 2020-03-28 21:58:27 +01:00
a7ef031090 Final doc before I go to sleep 2020-03-27 23:31:24 +01:00
9f5c040893 Added documentation 2020-03-27 23:27:00 +01:00
7d0a371311 Changed ruleset variable type to i32 2020-03-27 23:26:16 +01:00
e00f489f55 Added From trait to Settings 2020-03-27 23:25:16 +01:00
6 changed files with 764 additions and 151 deletions

View File

@@ -7,8 +7,17 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
serde = {version = "1.0", features = ["derive"]} # Logger
serde_yaml = "0.7"
serde_json = "1.0"
log = "0.4" log = "0.4"
simplelog = "0.7" simplelog = "0.8"
# Struct serializing and deserializing
serde = {version = "1.0", features = ["derive"]}
serde_yaml = "0.8"
serde_json = "1.0"
# Regex support
regex = "1.3"
lazy_static = "1.4"
# [dev-dependencies]
# Pretty output
prettydiff = "0.3"

View File

@@ -1,4 +1,5 @@
#![crate_name = "lang_evolve_core"] #![crate_name = "lang_evolve_core"]
#![deny(clippy::all)]
//! # LangEvolveCore //! # LangEvolveCore
//! //!
@@ -13,16 +14,12 @@
//! user-defined sound changes to words and texts based on regex expressions. //! user-defined sound changes to words and texts based on regex expressions.
use std::fs::File; use std::fs::File;
use std::io::Result;
use std::path::PathBuf;
extern crate log;
extern crate simplelog;
use log::{info, warn}; use log::{info, warn};
use simplelog::*; use simplelog::*;
pub mod settings; pub mod settings;
use settings::utils; pub mod utils;
/// Initializes the crate /// Initializes the crate
/// ///
@@ -44,19 +41,39 @@ use settings::utils;
/// lang_evolve_core::init(); /// lang_evolve_core::init();
/// ``` /// ```
pub fn init() -> std::result::Result<(), log::SetLoggerError> { pub fn init() -> std::result::Result<(), log::SetLoggerError> {
match CombinedLogger::init(vec![ // #[cfg(debug_assertions)]
TermLogger::new( match CombinedLogger::init(if cfg!(debug_assertions) {
LevelFilter::Warn, vec![
Config::default(), WriteLogger::new(
TerminalMode::Mixed, LevelFilter::Warn,
) Config::default(),
.unwrap(), File::create("core.log").unwrap(),
WriteLogger::new( ),
LevelFilter::Info, WriteLogger::new(
Config::default(), LevelFilter::Debug,
File::create("core.log").unwrap(), Config::default(),
), File::create("core.log").unwrap(),
]) { ),
WriteLogger::new(
LevelFilter::Info,
Config::default(),
File::create("core.log").unwrap(),
),
]
} else {
vec![
WriteLogger::new(
LevelFilter::Warn,
Config::default(),
File::create("core.log").unwrap(),
),
WriteLogger::new(
LevelFilter::Info,
Config::default(),
File::create("core.log").unwrap(),
),
]
}) {
Err(why) => { Err(why) => {
warn!("Could not initialize logger: {}", why.to_string()); warn!("Could not initialize logger: {}", why.to_string());
Err(why) Err(why)
@@ -68,7 +85,28 @@ pub fn init() -> std::result::Result<(), log::SetLoggerError> {
} }
} }
/// Import user input from a text file and return them as a String #[cfg(test)]
pub fn import_input(path: PathBuf) -> Result<String> { mod tests {
utils::read_file(&path) use super::*;
#[test]
fn write_settings() {
let s = settings::Settings::new();
let path = std::path::Path::new("settings.yaml");
let settings = r#"---
version: "1"
categories: {}
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());
}
#[test]
fn read_settings() {
let s1 = settings::Settings::new();
let path = std::path::Path::new("settings.yml");
s1.export(&path).unwrap();
let s2 = settings::Settings::import(&path).unwrap();
assert_eq!(s1, s2);
}
} }

View File

@@ -1,31 +1,132 @@
extern crate serde;
extern crate serde_json;
extern crate serde_yaml;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
extern crate log; use log::{debug, error, info};
use log::warn;
// mod utils; use crate::utils::{self, SettingsType};
pub mod utils;
use utils::SettingsType;
#[allow(dead_code)] use prettydiff::diff_words;
const RULESET_CURRENT_VERSION: &'static str = "1";
#[derive(Debug, Deserialize, Serialize)] mod rule;
use rule::Rule;
/// Current version of the ruleset. It will help determine if the ruleset is
/// outdated or from a more recent version of the software than the one being in
/// use.
const RULESET_CURRENT_VERSION: i32 = 1;
/// Encode a [`Settings`] struct to a filetype, returns a
/// `std::result::Result<std::string::String, std::io::Error>`
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, must implement `to_string`
/// * `content` - content to encode, must be `Settings` struct
///
/// # Example
///
/// ```no_run
/// use std::path::Path;
/// use lang_evolve_core::utils;
///
/// let settings = Settings::new();
/// let filetype = utils::get_file_type(Path::new("settings.yml"));
///
/// let content = match filetype {
/// SettingsType::Yaml => encode_settings!(serde_yaml, &settings),
/// SettingsType::Json => encode_settings!(serde_json, &settings),
/// _ => String::from("Error!"),
/// };
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
macro_rules! encode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::to_string($content) {
Err(e) => {
log::error!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
}
};
}
/// Decode a [`Settings`] struct from a `std::std::String`, returns a
/// std::result::Result<lang_evolve_core::settings::Settings, std::io::Error>
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string`
/// * `content` - `&str` to decode into a [`Settings`]
///
/// # Example
///
/// ```no_run
/// # use lang_evolve_core::decode_settings;
/// let str = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = decode_settings!(serde_json, str);
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
macro_rules! decode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::from_str($content) {
Err(e) => {
log::error!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}
};
}
use std::collections::HashMap;
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
/// while running, which includes the phoneme categories as well as
/// the soundchange rules to apply to the input text.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Settings { pub struct Settings {
/// Represents the version of the loaded ruleset.
///
/// It is used to detect obsolete ruleset representations or if a
/// loaded ruleset comes from a newer version of lang_evolve_core
/// than the one used by the user.
#[serde(default = "Settings::get_ruleset_version")] #[serde(default = "Settings::get_ruleset_version")]
version: String, version: String,
/// Categories of phonemes
///
/// This is a vector of categories of phonemes, represented
/// themselves as pairs of strings. Each pair of strings has its
/// first element represent the name of the category, which is
/// generally represented by a single capital letter. The second
/// element is a string where all its characters represent
/// phonemes. It is currently not possible to have more than one
/// character to be considered as one sound.
#[serde(default)] #[serde(default)]
categories: Vec<(String, String)>, categories: HashMap<String, String>,
/// Soundchange rules
///
/// This is a vector of pairs of strings, the first one represents
/// a regex to be matched while the second represents the change
/// to be made to the input data.
#[serde(default)] #[serde(default)]
rules: Vec<(String, String)>, rules: Vec<Rule>,
} }
/// Representation inside the crate of LangEvolves settings. /// Representation inside the crate of LangEvolves settings.
impl Settings { impl Settings {
/// Creates a new empty instance of `Settings` /// Creates a new empty instance of [`Settings`]
/// ///
/// # Example /// # Example
/// ///
@@ -33,26 +134,27 @@ impl Settings {
/// let s = lang_evolve_core::settings::Settings::new(); /// let s = lang_evolve_core::settings::Settings::new();
/// let content_yaml = r#"--- /// let content_yaml = r#"---
/// version: "1" /// version: "1"
/// categories: [] /// categories: {}
/// rules: []"#; /// rules: []"#;
/// let content_json = r#"{"version":"1","categories":[],"rules":[]}"#; /// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap()); /// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap()); /// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ``` /// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
version: Self::get_ruleset_version(), version: Self::get_ruleset_version(),
categories: Vec::new(), categories: HashMap::new(),
rules: Vec::new(), rules: Vec::new(),
} }
} }
/// Import settings from an imput file. The currently allowed file formats /// Import settings from an imput file.
/// are: ///
/// - JSON - with the `.json` extension /// The currently allowed file formats are described in the
/// - Yaml - with the `.yaml` or `.yml` extension /// [`utils::SettingsType`] enum. If the ruleset version is higher than the
/// The format will be automatically detected based on the filename /// current version (see [`Settings.version`]), then an error is returned.
/// extension.
/// ///
/// # Arguments /// # Arguments
/// ///
@@ -60,9 +162,15 @@ impl Settings {
/// ///
/// # Example /// # Example
/// ///
/// ```no_run /// ```
/// use std::path::Path; /// use std::path::Path;
/// use lang_evolve_core::settings::Settings; /// # use lang_evolve_core::settings::Settings;
/// # let s = Settings::new();
/// # for path in vec!["settings.json", "settings.yaml", "settings.yml"] {
/// # let path = Path::new(path);
/// # s.export(&path).unwrap();
/// # }
///
/// let path_json = Path::new("settings.json"); /// let path_json = Path::new("settings.json");
/// let _s_json = Settings::import(&path_json).unwrap(); /// let _s_json = Settings::import(&path_json).unwrap();
/// ///
@@ -72,50 +180,120 @@ impl Settings {
/// let path_yml = Path::new("settings.yml"); /// let path_yml = Path::new("settings.yml");
/// let _s_yml = Settings::import(&path_yml).unwrap(); /// let _s_yml = Settings::import(&path_yml).unwrap();
/// ``` /// ```
///
/// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html
/// [`Settings.version`]: ./struct.Settings.html#structfield.version
pub fn import(path: &std::path::Path) -> std::io::Result<Self> { pub fn import(path: &std::path::Path) -> std::io::Result<Self> {
use SettingsType::*; use SettingsType::{Json, Yaml};
let display = path.display(); let file_type = utils::get_file_type(&path);
let file_type = utils::get_file_type(&path).unwrap(); let content = utils::read_file(&path)?;
let content = match utils::read_file(&path) {
Err(e) => {
warn!("Could not read file {}: {}", display, e.to_string());
return Err(e);
}
Ok(content) => content,
};
let settings: Settings = match file_type { let settings: Settings = match file_type {
Yaml => match serde_yaml::from_str(&content) { Yaml => decode_settings!(serde_yaml, &content),
Json => decode_settings!(serde_json, &content),
// Attempt to decode anyway
_ => match Settings::from_str(&content.as_str()) {
Ok(val) => val,
Err(e) => { Err(e) => {
warn!("Could not import settings: {}", e.to_string()); error!(
"Could not decode input {}: {}",
content,
e.to_string()
);
return Err(std::io::Error::new( return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput, std::io::ErrorKind::InvalidData,
e, e,
)); ));
} }
Ok(val) => val,
},
Json => match serde_json::from_str(&content) {
Err(e) => {
warn!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}, },
}; };
Ok(settings) if settings.version > Self::get_ruleset_version() {
error!("Ruleset version too high!");
Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Ruleset version too high!",
))
} else {
info!("Successfuly imported {}", path.display());
Ok(settings)
}
} }
/// Export the current rules to a file. The allowed file formats are either /// Import settings from file path described by the argument `path`
/// a YAML file or a Json file, hence the allowed filename extension are: ///
/// * "yml" or "yaml" for Yaml files /// # Arguments
/// * "json" for Json files ///
/// The format is detected automatically depending on the extension of the /// * `path` - path to the file from which settings should be imported
/// filename. ///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// # use std::path::Path;
/// # let s = Settings::default();
/// # s.export(Path::new("settings.yml"));
/// let s = Settings::from("settings.yml");
/// ```
pub fn from<S>(s: S) -> std::io::Result<Self>
where
S: ToString,
{
let s = s.to_string();
let path = std::path::Path::new(&s);
Self::import(&path)
}
/// Add a new rule to the current settings
///
/// # Arguments
///
/// * `from` - Regex that should match the text to be replaced
/// * `to` - Regex that should replace some text
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let mut settings = Settings::default();
/// settings.add_rule("ha", "wa");
///
/// use std::str::FromStr;
/// let reference = Settings::from_str(
/// r#"{"version":"1","categories":{},"rules":[{"from":"ha","to":"wa"}]}"#)
/// .unwrap();
/// assert_eq!(reference, settings);
/// ```
pub fn add_rule(&mut self, from: &str, to: &str) {
self.rules.push(Rule::new(from, to))
}
/// Add a new category of phonemes to the current settings
///
/// # Arguments
///
/// * `name` - Name of the category
/// * `content` - Content of the category, phonemes
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let mut settings = Settings::default();
/// settings.add_category("C", "abcde");
///
/// use std::str::FromStr;
/// let reference = Settings::from_str(
/// r#"{"version":"1","categories":{"C": "abcde"},"rules":[]}"#)
/// .unwrap();
/// assert_eq!(reference, settings);
/// ```
pub fn add_category(&mut self, name: &str, content: &str) {
self.categories.insert(String::from(name), String::from(content));
}
/// Export current settings to a file.
///
/// The allowed file formats are described in the [`SettingsType`] enum.
/// ///
/// # Arguments /// # Arguments
/// ///
@@ -124,49 +302,169 @@ impl Settings {
/// # Example /// # Example
/// ///
/// ``` /// ```
/// # use lang_evolve_core::settings::Settings;
/// use std::path::Path; /// use std::path::Path;
/// let s = lang_evolve_core::settings::Settings::new(); ///
/// let s = Settings::new();
/// ///
/// // Export to JSON /// // Export to JSON
/// let path_json = Path::new("./output.json"); /// let path_json = Path::new("settings.json");
/// s.export(&path_json).unwrap(); /// s.export(&path_json).unwrap();
/// ///
/// // Export to Yaml, both ".yml" and ".yaml" work /// // Export to Yaml, both ".yml" and ".yaml" work
/// let path_yaml = Path::new("./output.yaml"); /// let path_yaml = Path::new("settings.yaml");
/// s.export(&path_yaml).unwrap(); /// s.export(&path_yaml).unwrap();
/// let path_yml = Path::new("./output.yml"); /// let path_yml = Path::new("settings.yml");
/// s.export(&path_yml).unwrap(); /// s.export(&path_yml).unwrap();
/// ``` /// ```
///
/// [`SettingsType`]: ./utils/enum.SettingsType.html
pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> { pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> {
let filetype = utils::get_file_type(&path).unwrap(); let filetype = utils::get_file_type(&path);
let content = match filetype { let content = match filetype {
SettingsType::Yaml => match serde_yaml::to_string(&self) { SettingsType::Yaml => encode_settings!(serde_yaml, &self),
Err(e) => { SettingsType::Json => encode_settings!(serde_json, &self),
warn!("Could not serialize settings: {}", e.to_string()); _ => {
return Err(std::io::Error::new( error!("Unknown filetype {}", path.to_str().unwrap());
std::io::ErrorKind::InvalidData, return Err(std::io::Error::new(
e, std::io::ErrorKind::InvalidData,
)); "Unknown file type",
} ));
Ok(val) => val, }
},
SettingsType::Json => match serde_json::to_string(&self) {
Err(e) => {
warn!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
},
}; };
info!("Successfuly exported settings to {}", path.display());
utils::write_file(&path, &content) utils::write_file(&path, &content)
} }
/// Get the current ruleset version of LangEvolve. /// Get the current ruleset version of LangEvolve.
fn get_ruleset_version() -> String { fn get_ruleset_version() -> String {
String::from(RULESET_CURRENT_VERSION) RULESET_CURRENT_VERSION.to_string()
}
/// Transform input rules into Regexes that can be understood by Rust.
fn update_rules(&self) -> std::result::Result<Vec<Rule>, String> {
let rules = self.rules.clone();
let rules: Vec<Rule> = rules
.iter()
.map(|rule| rule.update(&self.categories).unwrap())
.collect();
Ok(rules)
}
/// Apply list of rules to input
///
/// The list of rules in the struct will be applied to the input `new`. If the
/// rule contains the `%` character followed by a capital letter, this marks
/// a category of phonemes and should be replaced by them. For instance, we
/// have:
/// - the category `C` defined as `bcdfg`
/// - the rule `%Ci` to `%Cj`
/// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j`
///
/// # Arguments
///
/// * `new` - Input to modify
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let settings = Settings::new();
/// // add some rules...
/// // set some input
/// let input = String::new();
/// let _output = settings.apply(input);
/// ```
pub fn apply(&self, s: String) -> std::result::Result<String, String> {
// TODO Add Error handling
let rules = self.update_rules().unwrap();
let mut s = s;
debug!("===============================================");
for rule in rules {
debug!(
"from: \"{}\"\tto: \"{}\"",
rule.get_from().to_string(),
rule.get_to()
);
let old = s.clone();
let new = rule
.get_from()
.replace_all(&s, rule.get_to().as_str())
.to_string();
if cfg!(debug_assertions) {
let diffs = diff_words(&old, &new);
if diffs.diff().len() > 1 {
debug!("diff:\n{}", diff_words(&old, &new));
} else {
debug!("diff: No changes");
}
}
s = new;
}
Ok(s)
}
}
impl Default for Settings {
/// Creates a new empty instance of [`Settings`]
///
/// # Example
///
/// ```
/// let s = lang_evolve_core::settings::Settings::default();
/// let content_yaml = r#"---
/// version: "1"
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
fn default() -> Self {
Self::new()
}
}
use std::str::FromStr;
impl FromStr for Settings {
type Err = serde_yaml::Error;
/// Decode a litteral string into a `Settings` struct. Works only for
/// supported file types described in `SettingsType`. It will try to decode
/// the input `s` by any mean known by `SettingsType`.
///
/// # Arguments
///
/// * `s` - litteral string to decode into a `Settings` struct
///
/// # Example
///
/// ```
/// # use std::str::FromStr;
/// let s = r#"{"version":"1","categories":{},"rules":[]}"#;
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
match serde_json::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(_) => match serde_yaml::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(e) => {
error!("Could not decode input {}: {}", s, e.to_string());
Err(e)
}
},
}
}
}
use std::fmt;
use std::fmt::Display;
impl Display for Settings {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", serde_json::to_string(&self).unwrap())
} }
} }
@@ -179,24 +477,3 @@ impl PartialEq for Settings {
} }
impl Eq for Settings {} impl Eq for Settings {}
#[test]
fn write_settings() {
let s = Settings::new();
let path = std::path::Path::new("test.yaml");
let settings = r#"---
version: "1"
categories: []
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());
}
#[test]
fn read_settings() {
let s1 = Settings::new();
let path = std::path::Path::new("test.yml");
s1.export(&path).unwrap();
let s2 = Settings::import(&path).unwrap();
assert_eq!(s1, s2);
}

179
src/settings/rule/mod.rs Normal file
View File

@@ -0,0 +1,179 @@
use std::collections::HashMap;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
mod regex_wrapper;
use regex_wrapper::Regex;
lazy_static! {
static ref RE: Regex = Regex::new("%([A-Z])");
}
/// Representation of a rule in LangEvolveRs
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Rule {
/// Regex that should match the input text
from: Regex,
/// Text to replace matched text
to: String,
}
impl Rule {
/// Create new rule
///
/// # Arguments
///
/// * `from` - literal string that represents the regex that should match
/// the input text
/// * `to` - literal string that represents the regex text that should
/// replaced the text matched by `from`
///
/// # Example
/// ```
/// # use lazy_static::lazy_static;
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule = Rule::new("ab+c*", "ab");
/// ```
pub fn new(from: &str, to: &str) -> Self {
Rule {
from: Regex::new(from),
to: String::from(to),
}
}
/// Detect the number of categories in a rule
///
/// For a rule, this function detects the number of categories set in the
/// `from` member of a `Rule` and in its `to` member. The result is returned
/// as a tuple of `u8`s.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule = Rule::new("%Bea*i+", "a%A%C");
/// let nb_rules = rule.detect_number_categories();
/// assert_eq!((1 as u8, 2 as u8), nb_rules);
/// ```
pub fn detect_number_categories(&self) -> (u8, u8) {
let captures_from = self.from.to_string().matches('%').count() as u8;
let captures_to = self.to.matches('%').count() as u8;
(captures_from, captures_to)
}
fn simple_rewrite(&self, categories: &HashMap<String, String>) -> Self {
let mut rule = self.clone();
for (category, content) in categories {
rule.from = Regex::new(
rule.from
.to_string()
.replace(
format!("%{}", category).as_str(),
format!("[{}]", content).as_str(),
)
.as_str(),
);
}
rule
}
// TODO break categories in different rules
pub fn update(
&self,
categories: &HashMap<String, String>,
) -> std::result::Result<Rule, String> {
let mut rule = self.clone();
let (from_match, to_match) = self.detect_number_categories();
// If there are only simple rewrites to make in the from String
if from_match > 0 && to_match == 0 {
rule = self.simple_rewrite(&categories);
}
// If there are equivalences between from and to
if from_match > 0 && to_match <= from_match && to_match > 0 {}
Ok(rule)
}
pub fn get_from(&self) -> &Regex {
&self.from
}
pub fn get_to(&self) -> String {
self.to.clone()
}
}
impl From<&str> for Rule {
/// Allow to create a rule from a single literal string
///
/// It is possible to create a rule from a string, delimited by a `>`. This
/// means a rule like `%C>%D` will be interpreted as going from `%C` to
/// `%D`.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule1 = Rule::new("%C", "%D");
/// let rule2 = Rule::from("%C>%D");
/// assert_eq!(rule1, rule2);
/// ```
fn from(source: &str) -> Self {
let components: Vec<&str> = source.split_terminator('>').collect();
Rule::new(components[0], components[1])
}
}
impl From<String> for Rule {
/// Allow to create a rule from a single `String`
///
/// It is possible to create a rule from a string, delimited by a `>`. This
/// means a rule like `%C>%D` will be interpreted as going from `%C` to
/// `%D`.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// # use rule::Rule;
/// let rule1 = Rule::new("%C", "%D");
/// let rule2 = Rule::from(String::from("%C>%D"));
/// assert_eq!(rule1, rule2);
/// ```
fn from(source: String) -> Self {
let components: Vec<&str> = source.split_terminator('>').collect();
Rule::new(components[0], components[1])
}
}
impl PartialEq for Rule {
/// Equality between `Rule` structs
///
/// This allows for equality comparison between two `Rule` structs.
///
/// # Example
///
/// ```
/// # #[path = "mod.rs"]
/// # mod rule;
/// use rule::Rule;
/// let rule1 = Rule::new("%C", "%D");
/// let rule2 = Rule::from("%C>%D");
/// assert!(rule1 == rule2);
/// assert!(rule2 == rule1);
/// ```
fn eq(&self, other: &Self) -> bool {
self.from == other.from && self.to == other.to
}
}
impl Eq for Rule {}

View File

@@ -0,0 +1,86 @@
use std::{fmt, ops};
#[derive(Clone, Debug)]
pub struct Regex(regex::Regex);
impl Regex {
/// Create a new Regex wrapper around regex::Regex;
///
/// # Arguments
///
/// * `s` - string litteral from which to create the new Regex
pub fn new(s: &str) -> Self {
Self(regex::Regex::new(s).unwrap())
}
/// Returns a string literal representation of the Regex
#[allow(unused)]
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl ToString for Regex {
fn to_string(&self) -> String {
self.0.to_string()
}
}
use std::hash::{Hash, Hasher};
impl Hash for Regex {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state);
}
}
impl ops::Deref for Regex {
type Target = regex::Regex;
fn deref(&self) -> &regex::Regex {
&self.0
}
}
impl<'de> serde::Deserialize<'de> for Regex {
fn deserialize<D>(de: D) -> Result<Regex, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{Error, Visitor};
struct RegexVisitor;
impl<'de> Visitor<'de> for RegexVisitor {
type Value = Regex;
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("a regular expression pattern")
}
fn visit_str<E: Error>(self, v: &str) -> Result<Regex, E> {
regex::Regex::new(v)
.map(Regex)
.map_err(|err| E::custom(err.to_string()))
}
}
de.deserialize_str(RegexVisitor)
}
}
use serde::{Serialize, Serializer};
impl Serialize for Regex {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.0.as_str())
}
}
impl PartialEq for Regex {
fn eq(&self, other: &Self) -> bool {
self.0.to_string() == other.0.to_string()
}
}
impl Eq for Regex {}

View File

@@ -1,17 +1,21 @@
extern crate log; use log::{info, error};
use log::{info, warn};
use std::fs::File; use std::fs::File;
use std::io::{Read, Result}; use std::io::{Read, Result};
use std::path::Path; use std::path::Path;
/// Type of supported settings format: yaml or json /// Type of supported settings format: yaml or json
#[derive(Debug, PartialEq)]
pub enum SettingsType { pub enum SettingsType {
/// Files ending with the `yml` or `yaml` extension /// Files ending with the `yml` or `yaml` extension
Yaml, Yaml,
/// Files ending with the `json` extension /// Files ending with the `json` extension
Json, Json,
/// Other file type, used to describe files without an extension or with an
/// unsupported extension
Other,
} }
/// Read a files content into a `String` /// Read a files content into a `String`
@@ -19,14 +23,15 @@ pub enum SettingsType {
/// # Example /// # Example
/// ///
/// ```no_run /// ```no_run
/// # use lang_evolve_core::utils;
/// let path = std::path::Path::new("./some/path/to/my/file.json"); /// let path = std::path::Path::new("./some/path/to/my/file.json");
/// let content = lang_evolve_core::settings::utils::read_file(&path).unwrap(); /// let content = utils::read_file(&path).unwrap();
/// ``` /// ```
pub fn read_file(path: &Path) -> Result<String> { pub fn read_file(path: &Path) -> Result<String> {
let display = path.display(); let display = path.display();
let mut file = match File::open(&path) { let mut file = match File::open(&path) {
Err(why) => { Err(why) => {
warn!("Could not read {}: {}", display, why.to_string()); error!("Could not read {}: {}", display, why.to_string());
return Err(why); return Err(why);
} }
Ok(file) => file, Ok(file) => file,
@@ -34,8 +39,8 @@ pub fn read_file(path: &Path) -> Result<String> {
let mut content = String::new(); let mut content = String::new();
match file.read_to_string(&mut content) { match file.read_to_string(&mut content) {
Err(why) => { Err(why) => {
warn!("Could not read {}: {}", display, why.to_string()); error!("Could not read {}: {}", display, why.to_string());
return Err(why); Err(why)
} }
Ok(_) => { Ok(_) => {
info!("Content of {} read", display); info!("Content of {} read", display);
@@ -49,9 +54,10 @@ pub fn read_file(path: &Path) -> Result<String> {
/// # Example /// # Example
/// ///
/// ```no_run /// ```no_run
/// # use lang_evolve_core::utils;
/// let content = String::from("This is my content"); /// let content = String::from("This is my content");
/// let path = std::path::Path::new("./path/to/my/file.txt"); /// let path = std::path::Path::new("./path/to/my/file.txt");
/// lang_evolve_core::settings::utils::write_file(&path, &content).unwrap(); /// utils::write_file(&path, &content).unwrap();
/// ``` /// ```
pub fn write_file<S>(path: &Path, content: &S) -> Result<()> pub fn write_file<S>(path: &Path, content: &S) -> Result<()>
where where
@@ -60,14 +66,14 @@ where
use std::io::prelude::*; use std::io::prelude::*;
let mut file = match File::create(&path) { let mut file = match File::create(&path) {
Err(e) => { Err(e) => {
warn!("Could not open file {}: {}", path.display(), e.to_string()); error!("Could not open file {}: {}", path.display(), e.to_string());
return Err(e); return Err(e);
} }
Ok(file) => file, Ok(file) => file,
}; };
match file.write_all(content.to_string().as_bytes()) { match file.write_all(content.to_string().as_bytes()) {
Err(e) => { Err(e) => {
warn!( error!(
"Could not write settings to file {}: {}", "Could not write settings to file {}: {}",
path.display(), path.display(),
e.to_string() e.to_string()
@@ -78,17 +84,38 @@ where
info!("Wrote settings to file {}", path.display()); info!("Wrote settings to file {}", path.display());
} }
}; };
info!("Successfuly written {}", path.display());
Ok(()) Ok(())
} }
pub fn get_file_type(path: &Path) -> Result<SettingsType> { /// Get the type of file to be opened based on its extension. Currently
/// supported file types are:
/// * JSON - `.json` extension
/// * Yaml - `.yml` or `.yaml` extensions
///
/// # Arguments
///
/// - `path` - Path of the file to be determined
///
/// # Example
///
/// ```
/// # use lang_evolve_core::utils;
/// let file_json = std::path::Path::new("file.json");
/// assert_eq!(utils::SettingsType::Json,
/// utils::get_file_type(&file_json));
///
/// let file_yaml = std::path::Path::new("file.yaml");
/// assert_eq!(utils::SettingsType::Yaml,
/// utils::get_file_type(&file_yaml));
///
/// let file_yml = std::path::Path::new("file.yml");
/// assert_eq!(utils::SettingsType::Yaml,
/// utils::get_file_type(&file_yml));
/// ```
pub fn get_file_type(path: &Path) -> SettingsType {
let extension = match path.extension() { let extension = match path.extension() {
None => { None => { return SettingsType::Other; }
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"File has no extension",
));
}
Some(val) => val, Some(val) => val,
}; };
let extension = extension let extension = extension
@@ -96,11 +123,8 @@ pub fn get_file_type(path: &Path) -> Result<SettingsType> {
.expect("Could not get String out of extension") .expect("Could not get String out of extension")
.to_lowercase(); .to_lowercase();
match extension.as_str() { match extension.as_str() {
"yml" | "yaml" => Ok(SettingsType::Yaml), "yml" | "yaml" => SettingsType::Yaml,
"json" => Ok(SettingsType::Json), "json" => SettingsType::Json,
_ => Err(std::io::Error::new( _ => SettingsType::Other,
std::io::ErrorKind::InvalidInput,
"Invalid extension",
)),
} }
} }