use serde::{Deserialize, Serialize}; use log::{debug, error, info}; use crate::utils::{self, SettingsType}; use prettydiff::diff_words; mod rule; use rule::Rule; /// Current version of the ruleset. It will help determine if the ruleset is /// outdated or from a more recent version of the software than the one being in /// use. const RULESET_CURRENT_VERSION: i32 = 1; /// Encode a [`Settings`] struct to a filetype, returns a /// `std::result::Result` /// /// # Arguments /// /// * `funcrate` - `serde`-compatible crate to use, must implement `to_string` /// * `content` - content to encode, must be `Settings` struct /// /// # Example /// /// ```ignore /// # use lang_evolve_core::settings::*; /// # use lang_evolve_core::encode_settings; /// use std::io::{Error, ErrorKind}; /// use std::path::Path; /// let filetype = utils::get_file_type(Path::new("./path/to/file.json")); /// let s = Settings::new(); /// let content = match filetype { /// utils::SettingsType::Yaml => encode_settings!(serde_yaml, &s).unwrap(), /// utils::SettingsType::Json => encode_settings!(serde_json, &s).unwrap(), /// _ => panic!("Could not encode settings"), /// }; /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html macro_rules! encode_settings { ($funcrate:ident, $content:expr) => { match $funcrate::to_string($content) { Err(e) => { log::error!("Could not serialize settings: {}", e.to_string()); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, e, )); } Ok(val) => val, } }; } /// Decode a [`Settings`] struct from a `std::std::String`, returns a /// std::result::Result /// /// # Arguments /// /// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string` /// * `content` - `&str` to decode into a [`Settings`] /// /// # Example /// /// ```ignore /// # use lang_evolve_core::decode_settings; /// let str = r#"{"version":"1","categories":[],"rules":[]}"#; /// let settings = decode_settings!(serde_json, str); /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html macro_rules! decode_settings { ($funcrate:ident, $content:expr) => { match $funcrate::from_str($content) { Err(e) => { log::error!("Could not import settings: {}", e.to_string()); return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, e, )); } Ok(val) => val, } }; } use std::collections::HashMap; /// Representation of the software’s settings /// /// This struct represents all the settings the software has to follow /// while running, which includes the phoneme categories as well as /// the soundchange rules to apply to the input text. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Settings { /// Represents the version of the loaded ruleset. /// /// It is used to detect obsolete ruleset representations or if a /// loaded ruleset comes from a newer version of lang_evolve_core /// than the one used by the user. #[serde(default = "Settings::get_ruleset_version")] version: String, /// Categories of phonemes /// /// This is a vector of categories of phonemes, represented /// themselves as pairs of strings. Each pair of strings has its /// first element represent the name of the category, which is /// generally represented by a single capital letter. The second /// element is a string where all its characters represent /// phonemes. It is currently not possible to have more than one /// character to be considered as one sound. #[serde(default)] categories: HashMap, /// Soundchange rules /// /// This is a vector of pairs of strings, the first one represents /// a regex to be matched while the second represents the change /// to be made to the input data. #[serde(default)] rules: Vec, } /// Representation inside the crate of LangEvolve’s settings. impl Settings { /// Creates a new empty instance of [`Settings`] /// /// # Example /// /// ``` /// let s = lang_evolve_core::settings::Settings::new(); /// let content_yaml = r#"--- /// version: "1" /// categories: {} /// rules: []"#; /// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#; /// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap()); /// assert_eq!(content_json, serde_json::to_string(&s).unwrap()); /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html pub fn new() -> Self { Self { version: Self::get_ruleset_version(), categories: HashMap::new(), rules: Vec::new(), } } /// Import settings from an imput file. /// /// The currently allowed file formats are described in the /// [`utils::SettingsType`] enum. If the ruleset version is higher than the /// current version (see [`Settings.version`]), then an error is returned. /// /// # Arguments /// /// * `path` - File to open and load settings from /// /// # Example /// /// ```no_run /// use std::path::Path; /// use lang_evolve_core::settings::Settings; /// let path_json = Path::new("settings.json"); /// let _s_json = Settings::import(&path_json).unwrap(); /// /// let path_yaml = Path::new("settings.yaml"); /// let _s_yaml = Settings::import(&path_yaml).unwrap(); /// /// let path_yml = Path::new("settings.yml"); /// let _s_yml = Settings::import(&path_yml).unwrap(); /// ``` /// /// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html /// [`Settings.version`]: ./struct.Settings.html#structfield.version pub fn import(path: &std::path::Path) -> std::io::Result { use SettingsType::{Json, Yaml}; let file_type = utils::get_file_type(&path); let content = utils::read_file(&path)?; let settings: Settings = match file_type { Yaml => decode_settings!(serde_yaml, &content), Json => decode_settings!(serde_json, &content), // Attempt to decode anyway _ => match Settings::from_str(&content.as_str()) { Ok(val) => val, Err(e) => { error!( "Could not decode input {}: {}", content, e.to_string() ); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, e, )); } }, }; if settings.version > Self::get_ruleset_version() { error!("Ruleset version too high!"); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, "Ruleset version too high!", )); } info!("Successfuly imported {}", path.display()); Ok(settings) } /// Import settings from file path described by the argument `path` /// /// # Arguments /// /// * `path` - path to the file from which settings should be imported /// /// # Example /// /// ```no_run /// let s = lang_evolve_core::settings::Settings::from("settings.yml"); /// ``` pub fn from(s: S) -> Self where S: ToString, { let s = s.to_string(); let path = std::path::Path::new(&s); Self::import(&path).unwrap() } /// Add a new rule to the current settings /// /// # Arguments /// /// * `from` - Regex that should match the text to be replaced /// * `to` - Regex that should replace some text pub fn add_rule(&mut self, from: &str, to: &str) { self.rules.push(Rule::new(from, to)) } /// Add a new category of phonemes to the current settings /// /// # Arguments /// /// * `name` - Name of the category /// * `content` - Content of the category, phonemes pub fn add_category(&mut self, name: &str, content: &str) { self.categories.insert(String::from(name), String::from(content)); } /// Export current settings to a file. /// /// The allowed file formats are described in the [`SettingsType`] enum. /// /// # Arguments /// /// * `path` - Path to write and export settings to /// /// # Example /// /// ``` /// # use lang_evolve_core::settings::Settings; /// use std::path::Path; /// /// let s = Settings::new(); /// /// // Export to JSON /// let path_json = Path::new("./output.json"); /// s.export(&path_json).unwrap(); /// /// // Export to Yaml, both ".yml" and ".yaml" work /// let path_yaml = Path::new("./output.yaml"); /// s.export(&path_yaml).unwrap(); /// let path_yml = Path::new("./output.yml"); /// s.export(&path_yml).unwrap(); /// ``` /// /// [`SettingsType`]: ./utils/enum.SettingsType.html pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> { let filetype = utils::get_file_type(&path); let content = match filetype { SettingsType::Yaml => encode_settings!(serde_yaml, &self), SettingsType::Json => encode_settings!(serde_json, &self), _ => { error!("Unknown filetype {}", path.to_str().unwrap()); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, "Unknown file type", )); } }; info!("Successfuly exported settings to {}", path.display()); utils::write_file(&path, &content) } /// Get the current ruleset version of LangEvolve. fn get_ruleset_version() -> String { RULESET_CURRENT_VERSION.to_string() } fn update_rules(&self) -> std::result::Result, String> { let rules = self.rules.clone(); let rules: Vec = rules .iter() .map(|rule| rule.update(&self.categories).unwrap()) .collect(); Ok(rules) } /// Apply list of rules to input /// /// The list of rules in the struct will be applied to the input `new`. If the /// rule contains the `%` character followed by a capital letter, this marks /// a category of phonemes and should be replaced by them. For instance, we /// have: /// - the category `C` defined as `bcdfg` /// - the rule `%Ci` to `%Cj` /// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j` /// /// # Arguments /// /// * `new` - Input to modify /// /// # Example /// /// ``` /// # use lang_evolve_core::settings::Settings; /// let settings = Settings::new(); /// // add some rules... /// // set some input /// let input = String::new(); /// let _output = settings.apply(input); /// ``` pub fn apply(&self, s: String) -> std::result::Result { // TODO Add Error handling let rules = self.update_rules().unwrap(); let mut s = s; debug!("==============================================="); for rule in rules { debug!( "from: {}\tto: {}", rule.get_from().to_string(), rule.get_to() ); debug!("old: {}", s); s = rule .get_from() .replace_all(&s, rule.get_to().as_str()) .to_string(); debug!("new: {}", s); } Ok(s) } } use std::str::FromStr; impl FromStr for Settings { type Err = serde_yaml::Error; /// Decode a litteral string into a `Settings` struct. Works only for /// supported file types described in `SettingsType`. It will try to decode /// the input `s` by any mean known by `SettingsType`. /// /// # Arguments /// /// * `s` - litteral string to decode into a `Settings` struct /// /// # Example /// /// ``` /// # use std::str::FromStr; /// let s = r#"{"version":"1","categories":{},"rules":[]}"#; /// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap(); /// ``` fn from_str(s: &str) -> Result { match serde_json::from_str::(s) { Ok(val) => Ok(val), Err(_) => match serde_yaml::from_str::(s) { Ok(val) => Ok(val), Err(e) => { error!("Could not decode input {}: {}", s, e.to_string()); Err(e) } }, } } } use std::fmt; use std::fmt::Display; impl Display for Settings { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", serde_json::to_string(&self).unwrap()) } } impl PartialEq for Settings { fn eq(&self, other: &Self) -> bool { self.version == other.version && self.categories == other.categories && self.rules == other.rules } } impl Eq for Settings {}