diff --git a/src/lib.rs b/src/lib.rs index c6525cb..0e8ca6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ use log::{info, warn}; use simplelog::*; pub mod settings; +mod utils; /// Initializes the crate /// diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 1b6c02e..40f839b 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -6,16 +6,15 @@ use serde::{Deserialize, Serialize}; extern crate log; use log::{debug, error, info}; -pub mod utils; -use utils::SettingsType; +use crate::utils::{self, SettingsType}; -pub mod regex_wrapper; -use regex_wrapper::Regex; +mod rule; +use rule::Rule; /// Current version of the ruleset. It will help determine if the ruleset is /// outdated or from a more recent version of the software than the one being in /// use. -pub const RULESET_CURRENT_VERSION: i32 = 1; +const RULESET_CURRENT_VERSION: i32 = 1; /// Encode a [`Settings`] struct to a filetype, returns a /// `std::result::Result` @@ -42,7 +41,6 @@ pub const RULESET_CURRENT_VERSION: i32 = 1; /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html -#[macro_export(local_inner_macros)] macro_rules! encode_settings { ($funcrate:ident, $content:expr) => { match $funcrate::to_string($content) { @@ -75,7 +73,6 @@ macro_rules! encode_settings { /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html -#[macro_export(local_inner_macros)] macro_rules! decode_settings { ($funcrate:ident, $content:expr) => { match $funcrate::from_str($content) { @@ -105,7 +102,7 @@ pub struct Settings { /// loaded ruleset comes from a newer version of lang_evolve_core /// than the one used by the user. #[serde(default = "Settings::get_ruleset_version")] - pub version: String, + version: String, /// Categories of phonemes /// @@ -117,7 +114,7 @@ pub struct Settings { /// phonemes. It is currently not possible to have more than one /// character to be considered as one sound. #[serde(default)] - pub categories: HashMap, + categories: HashMap, /// Soundchange rules /// @@ -125,7 +122,7 @@ pub struct Settings { /// a regex to be matched while the second represents the change /// to be made to the input data. #[serde(default)] - pub rules: Vec<(Regex, String)>, + rules: Vec, } /// Representation inside the crate of LangEvolve’s settings. @@ -182,10 +179,9 @@ impl Settings { /// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html /// [`Settings.version`]: ./struct.Settings.html#structfield.version pub fn import(path: &std::path::Path) -> std::io::Result { - use utils::SettingsType::{Json, Yaml}; + use SettingsType::{Json, Yaml}; let file_type = utils::get_file_type(&path); let content = utils::read_file(&path)?; - let settings: Settings = match file_type { Yaml => decode_settings!(serde_yaml, &content), Json => decode_settings!(serde_json, &content), @@ -236,6 +232,26 @@ impl Settings { Self::import(&path).unwrap() } + /// Add a new rule to the current settings + /// + /// # Arguments + /// + /// * `from` - Regex that should match the text to be replaced + /// * `to` - Regex that should replace some text + pub fn add_rule(&mut self, from: &str, to: &str) { + self.rules.push(Rule::new(from, to)) + } + + /// Add a new category of phonemes to the current settings + /// + /// # Arguments + /// + /// * `name` - Name of the category + /// * `content` - Content of the category, phonemes + pub fn add_category(&mut self, name: &str, content: &str) { + self.categories.insert(String::from(name), String::from(content)); + } + /// Export current settings to a file. /// /// The allowed file formats are described in the [`SettingsType`] enum. @@ -284,35 +300,12 @@ impl Settings { RULESET_CURRENT_VERSION.to_string() } - fn update_rules(&self) -> std::result::Result, String> { - let mut rules = self.rules.clone(); - - // TODO break categories in different rules - for (from, to) in rules.iter_mut() { - let re = Regex::new("%\\D"); - let from_match = re.is_match(from.as_str()); - let to_match = re.is_match(to); - if from_match || to_match { - for (category, content) in &self.categories { - if from_match { - *from = Regex::new( - from.to_string() - .replace( - format!("%{}", category).as_str(), - format!("[{}]", content).as_str(), - ) - .as_str() - ); - } - if to_match { - *to = to.to_string().replace( - format!("%{}", category).as_str(), - format!("[{}]", content).as_str() - ); - } - } - } - } + fn update_rules(&self) -> std::result::Result, String> { + let rules = self.rules.clone(); + let rules: Vec = rules + .iter() + .map(|x| x.update(&self.categories).unwrap()) + .collect(); Ok(rules) } @@ -345,10 +338,17 @@ impl Settings { let rules = self.update_rules().unwrap(); let mut s = s.clone(); debug!("==============================================="); - for (from, to) in rules { - debug!("from: {}\tto: {}", from.to_string(), to); + for rule in rules { + debug!( + "from: {}\tto: {}", + rule.get_from().to_string(), + rule.get_to() + ); debug!("old: {}", s); - s = from.replace_all(&s, to.as_str()).to_string(); + s = rule + .get_from() + .replace_all(&s, rule.get_to().as_str()) + .to_string(); debug!("new: {}", s); } Ok(s) diff --git a/src/settings/rule/mod.rs b/src/settings/rule/mod.rs new file mode 100644 index 0000000..fbbe245 --- /dev/null +++ b/src/settings/rule/mod.rs @@ -0,0 +1,94 @@ +extern crate serde; +extern crate serde_json; +extern crate serde_yaml; +use serde::{Deserialize, Serialize}; + +mod regex_wrapper; +use regex_wrapper::Regex; + +/// Representation of a rule in LangEvolveRs +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Rule { + /// Regex that should match the input text + from: Regex, + /// Text to replace matched text + to: String, +} + +impl Rule { + /// Create new rule + /// + /// # Arguments + /// + /// * `from` - literal string that represents the regex that should match + /// the input text + /// * `to` - literal string that represents the regex text that should + /// replaced the text matched by `from` + pub fn new(from: &str, to: &str) -> Self { + Rule { + from: Regex::new(from), + to: String::from(to), + } + } + + // TODO break categories in different rules + pub fn update( + &self, + categories: &std::collections::HashMap, + ) -> std::result::Result { + let mut rule = self.clone(); + let re = Regex::new("%\\D"); + let from_match = re.is_match(&self.from.as_str()); + let to_match = re.is_match(&self.to.as_str()); + if from_match && !to_match { + for (category, content) in categories { + rule.from = Regex::new( + rule.from + .to_string() + .replace( + format!("%{}", category).as_str(), + format!("[{}]", content).as_str(), + ) + .as_str(), + ); + } + } + Ok(rule) + } + + pub fn get_from(&self) -> &Regex { + &self.from + } + + pub fn get_to(&self) -> String { + self.to.clone() + } +} + +impl From for Rule { + fn from(source: String) -> Self { + let components: Vec<&str> = source.split_terminator(">").collect(); + Self { + from: Regex::new(components[0]), + to: String::from(components[1]), + } + } +} + +impl PartialEq for Rule { + fn eq(&self, other: &Self) -> bool { + self.from == other.from && self.to == other.to + } +} + +impl Eq for Rule {} + +#[test] +fn rule_new() { + let rule1 = Rule::new("([ae]+)i", "${1}i"); + let rule2 = Rule { + from: Regex::new("([ae]+)i"), + to: String::from("${1}i"), + }; + assert_eq!(rule1, rule2); +} diff --git a/src/settings/regex_wrapper.rs b/src/settings/rule/regex_wrapper.rs similarity index 81% rename from src/settings/regex_wrapper.rs rename to src/settings/rule/regex_wrapper.rs index f238a90..a819765 100644 --- a/src/settings/regex_wrapper.rs +++ b/src/settings/rule/regex_wrapper.rs @@ -1,17 +1,3 @@ -// extern crate serde; -// extern crate regex; -// use serde::{Deserialize, Serialize}; - -// #[derive(Clone, Debug, Deserialize, Serialize)] -// #[serde(transparent)] -// pub struct Regex(regex::Regex); - -// #[derive(Clone, Debug, Deserialize, Serialize)] -// #[serde(remote = "regex::Regex")] -// pub struct RegexDef{ -// #[serde(getter = "regex::Regex::to_string")] -// r: String -// } use std::{fmt, ops}; #[derive(Clone, Debug)] diff --git a/src/settings/utils.rs b/src/utils/mod.rs similarity index 100% rename from src/settings/utils.rs rename to src/utils/mod.rs