From ef8c02fc97b910b66726308be879fbc2335425bc Mon Sep 17 00:00:00 2001 From: Lucien Cartier-Tilet Date: Sat, 4 Apr 2020 15:46:29 +0200 Subject: [PATCH] Good progress with applying rules, room for improvement Rules are applied, however some bugs remain: - The HashMap for rules should be replaced by a Vec so they can be stored in order and not randomly - For some reasons, the `%` is not removed from some rules in the private function `update_rules` in the `Settings` struct. - Make it so replacements between square brackets work correctly --- src/lib.rs | 6 +++- src/settings/mod.rs | 83 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index decf7c9..c6525cb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,6 @@ extern crate simplelog; use log::{info, warn}; use simplelog::*; - pub mod settings; /// Initializes the crate @@ -48,6 +47,11 @@ pub fn init() -> std::result::Result<(), log::SetLoggerError> { TerminalMode::Mixed, ) .unwrap(), + WriteLogger::new( + LevelFilter::Debug, + Config::default(), + File::create("core.log").unwrap(), + ), WriteLogger::new( LevelFilter::Info, Config::default(), diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 2284d9c..577e64a 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -4,7 +4,7 @@ extern crate serde_yaml; use serde::{Deserialize, Serialize}; extern crate log; -use log::{error, info}; +use log::{debug, error, info}; pub mod utils; use utils::SettingsType; @@ -125,7 +125,7 @@ pub struct Settings { /// a regex to be matched while the second represents the change /// to be made to the input data. #[serde(default)] - pub rules: HashMap, + pub rules: HashMap, } /// Representation inside the crate of LangEvolve’s settings. @@ -138,9 +138,9 @@ impl Settings { /// let s = lang_evolve_core::settings::Settings::new(); /// let content_yaml = r#"--- /// version: "1" - /// categories: [] - /// rules: []"#; - /// let content_json = r#"{"version":"1","categories":[],"rules":[]}"#; + /// categories: {} + /// rules: {}"#; + /// let content_json = r#"{"version":"1","categories":{},"rules":{}}"#; /// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap()); /// assert_eq!(content_json, serde_json::to_string(&s).unwrap()); /// ``` @@ -283,6 +283,73 @@ impl Settings { fn get_ruleset_version() -> String { RULESET_CURRENT_VERSION.to_string() } + + fn update_rules(&self) -> Vec<(Regex, String)> { + let mut rules: Vec<(Regex, String)> = Vec::new(); + for (from, to) in &self.rules { + rules.push((from.clone(), to.to_string())); + } + + for (category, content) in &self.categories { + let mut temp_rules: Vec<(Regex, String)> = Vec::new(); + for (from, to) in &rules { + let from = Regex::new( + from.to_string() + .replace( + category.as_str(), + format!("[{}]", content).as_str(), + ) + .replace("%", "") + .as_str(), + ); + let to = to.to_string().replace( + category, + format!("[{}]", content).replace("%", "").as_str(), + ); + temp_rules.push((from, to)); + } + rules = temp_rules.clone(); + } + rules + } + + /// Apply list of rules to input + /// + /// The list of rules in the struct will be applied to the input `s`. If the + /// rule contains the `%` character followed by a capital letter, this marks + /// a category of phonemes and should be replaced by them. For instance, we + /// have: + /// - the category `C` defined as `bcdfg` + /// - the rule `%Ci` to `%Cj` + /// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j` + /// + /// # Arguments + /// + /// * `s` - Input to modify + /// + /// # Example + /// + /// ``` + /// # use lang_evolve_core::settings::Settings; + /// let settings = Settings::new(); + /// // add some rules... + /// let input = String::new(); + /// // set some input + /// let _output = settings.apply(input); + /// ``` + pub fn apply(&self, s: String) -> String { + // Replace all `%C`s by their equivalent + let rules = self.update_rules(); + let mut s = s.clone(); + debug!("==============================================="); + for (from, to) in rules { + debug!("from: {}\tto: {}", from.to_string(), to); + debug!("old: {}", s); + s = from.replace_all(&s, to.as_str()).to_string(); + debug!("new: {}", s); + } + s + } } use std::str::FromStr; @@ -301,7 +368,7 @@ impl FromStr for Settings { /// /// ``` /// # use std::str::FromStr; - /// let s = r#"{"version":"1","categories":[],"rules":[]}"#; + /// let s = r#"{"version":"1","categories":{},"rules":{}}"#; /// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap(); /// ``` fn from_str(s: &str) -> Result { @@ -342,8 +409,8 @@ fn write_settings() { let path = std::path::Path::new("test.yaml"); let settings = r#"--- version: "1" -categories: [] -rules: []"#; +categories: {} +rules: {}"#; utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap(); assert_eq!(settings, utils::read_file(&path).unwrap()); }