extern crate serde; extern crate serde_json; extern crate serde_yaml; use serde::{Deserialize, Serialize}; extern crate log; use log::{debug, error, info}; pub mod utils; use utils::SettingsType; pub mod regex_wrapper; use regex_wrapper::Regex; /// Current version of the ruleset. It will help determine if the ruleset is /// outdated or from a more recent version of the software than the one being in /// use. pub const RULESET_CURRENT_VERSION: i32 = 1; /// Encode a [`Settings`] struct to a filetype, returns a /// `std::result::Result` /// /// # Arguments /// /// * `funcrate` - `serde`-compatible crate to use, must implement `to_string` /// * `content` - content to encode, must be `Settings` struct /// /// # Example /// /// ```ignore /// # use lang_evolve_core::settings::*; /// # use lang_evolve_core::encode_settings; /// use std::io::{Error, ErrorKind}; /// use std::path::Path; /// let filetype = utils::get_file_type(Path::new("./path/to/file.json")); /// let s = Settings::new(); /// let content = match filetype { /// utils::SettingsType::Yaml => encode_settings!(serde_yaml, &s).unwrap(), /// utils::SettingsType::Json => encode_settings!(serde_json, &s).unwrap(), /// _ => panic!("Could not encode settings"), /// }; /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html #[macro_export(local_inner_macros)] macro_rules! encode_settings { ($funcrate:ident, $content:expr) => { match $funcrate::to_string($content) { Err(e) => { log::error!("Could not serialize settings: {}", e.to_string()); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, e, )); } Ok(val) => val, } }; } /// Decode a [`Settings`] struct from a `std::std::String`, returns a /// std::result::Result /// /// # Arguments /// /// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string` /// * `content` - `&str` to decode into a [`Settings`] /// /// # Example /// /// ```ignore /// # use lang_evolve_core::decode_settings; /// let str = r#"{"version":"1","categories":[],"rules":[]}"#; /// let settings = decode_settings!(serde_json, str); /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html #[macro_export(local_inner_macros)] macro_rules! decode_settings { ($funcrate:ident, $content:expr) => { match $funcrate::from_str($content) { Err(e) => { log::error!("Could not import settings: {}", e.to_string()); return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, e, )); } Ok(val) => val, } }; } use std::collections::HashMap; /// Representation of the software’s settings /// /// This struct represents all the settings the software has to follow /// while running, which includes the phoneme categories as well as /// the soundchange rules to apply to the input text. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Settings { /// Represents the version of the loaded ruleset. /// /// It is used to detect obsolete ruleset representations or if a /// loaded ruleset comes from a newer version of lang_evolve_core /// than the one used by the user. #[serde(default = "Settings::get_ruleset_version")] pub version: String, /// Categories of phonemes /// /// This is a vector of categories of phonemes, represented /// themselves as pairs of strings. Each pair of strings has its /// first element represent the name of the category, which is /// generally represented by a single capital letter. The second /// element is a string where all its characters represent /// phonemes. It is currently not possible to have more than one /// character to be considered as one sound. #[serde(default)] pub categories: HashMap, /// Soundchange rules /// /// This is a vector of pairs of strings, the first one represents /// a regex to be matched while the second represents the change /// to be made to the input data. #[serde(default)] pub rules: HashMap, } /// Representation inside the crate of LangEvolve’s settings. impl Settings { /// Creates a new empty instance of [`Settings`] /// /// # Example /// /// ``` /// let s = lang_evolve_core::settings::Settings::new(); /// let content_yaml = r#"--- /// version: "1" /// categories: {} /// rules: {}"#; /// let content_json = r#"{"version":"1","categories":{},"rules":{}}"#; /// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap()); /// assert_eq!(content_json, serde_json::to_string(&s).unwrap()); /// ``` /// /// [`Settings`]: ./settings/struct.Settings.html pub fn new() -> Self { Self { version: Self::get_ruleset_version(), categories: HashMap::new(), rules: HashMap::new(), } } /// Import settings from an imput file. /// /// The currently allowed file formats are described in the /// [`utils::SettingsType`] enum. If the ruleset version is higher than the /// current version (see [`Settings.version`]), then an error is returned. /// /// # Arguments /// /// * `path` - File to open and load settings from /// /// # Example /// /// ```no_run /// use std::path::Path; /// use lang_evolve_core::settings::Settings; /// let path_json = Path::new("settings.json"); /// let _s_json = Settings::import(&path_json).unwrap(); /// /// let path_yaml = Path::new("settings.yaml"); /// let _s_yaml = Settings::import(&path_yaml).unwrap(); /// /// let path_yml = Path::new("settings.yml"); /// let _s_yml = Settings::import(&path_yml).unwrap(); /// ``` /// /// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html /// [`Settings.version`]: ./struct.Settings.html#structfield.version pub fn import(path: &std::path::Path) -> std::io::Result { use utils::SettingsType::{Json, Yaml}; let file_type = utils::get_file_type(&path); let content = utils::read_file(&path)?; let settings: Settings = match file_type { Yaml => decode_settings!(serde_yaml, &content), Json => decode_settings!(serde_json, &content), // Attempt to decode anyway _ => match Settings::from_str(&content.as_str()) { Ok(val) => val, Err(e) => { error!( "Could not decode input {}: {}", content, e.to_string() ); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, e, )); } }, }; if settings.version > Self::get_ruleset_version() { error!("Ruleset version too high!"); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, "Ruleset version too high!", )); } info!("Successfuly imported {}", path.display()); Ok(settings) } /// Import settings from file path described by the argument `path` /// /// # Arguments /// /// * `path` - path to the file from which settings should be imported /// /// # Example /// /// ```no_run /// let s = lang_evolve_core::settings::Settings::from("settings.yml"); /// ``` pub fn from(s: S) -> Self where S: ToString, { let s = s.to_string(); let path = std::path::Path::new(&s); Self::import(&path).unwrap() } /// Export current settings to a file. /// /// The allowed file formats are described in the [`SettingsType`] enum. /// /// # Arguments /// /// * `path` - Path to write and export settings to /// /// # Example /// /// ``` /// use std::path::Path; /// let s = lang_evolve_core::settings::Settings::new(); /// /// // Export to JSON /// let path_json = Path::new("./output.json"); /// s.export(&path_json).unwrap(); /// /// // Export to Yaml, both ".yml" and ".yaml" work /// let path_yaml = Path::new("./output.yaml"); /// s.export(&path_yaml).unwrap(); /// let path_yml = Path::new("./output.yml"); /// s.export(&path_yml).unwrap(); /// ``` /// /// [`SettingsType`]: ./utils/enum.SettingsType.html pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> { let filetype = utils::get_file_type(&path); let content = match filetype { SettingsType::Yaml => encode_settings!(serde_yaml, &self), SettingsType::Json => encode_settings!(serde_json, &self), _ => { error!("Unknown filetype {}", path.to_str().unwrap()); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, "Unknown file type", )); } }; info!("Successfuly exported settings to {}", path.display()); utils::write_file(&path, &content) } /// Get the current ruleset version of LangEvolve. fn get_ruleset_version() -> String { RULESET_CURRENT_VERSION.to_string() } fn update_rules(&self) -> Vec<(Regex, String)> { let mut rules: Vec<(Regex, String)> = Vec::new(); for (from, to) in &self.rules { rules.push((from.clone(), to.to_string())); } for (category, content) in &self.categories { let mut temp_rules: Vec<(Regex, String)> = Vec::new(); for (from, to) in &rules { let from = Regex::new( from.to_string() .replace( category.as_str(), format!("[{}]", content).as_str(), ) .replace("%", "") .as_str(), ); let to = to.to_string().replace( category, format!("[{}]", content).replace("%", "").as_str(), ); temp_rules.push((from, to)); } rules = temp_rules.clone(); } rules } /// Apply list of rules to input /// /// The list of rules in the struct will be applied to the input `s`. If the /// rule contains the `%` character followed by a capital letter, this marks /// a category of phonemes and should be replaced by them. For instance, we /// have: /// - the category `C` defined as `bcdfg` /// - the rule `%Ci` to `%Cj` /// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j` /// /// # Arguments /// /// * `s` - Input to modify /// /// # Example /// /// ``` /// # use lang_evolve_core::settings::Settings; /// let settings = Settings::new(); /// // add some rules... /// let input = String::new(); /// // set some input /// let _output = settings.apply(input); /// ``` pub fn apply(&self, s: String) -> String { // Replace all `%C`s by their equivalent let rules = self.update_rules(); let mut s = s.clone(); debug!("==============================================="); for (from, to) in rules { debug!("from: {}\tto: {}", from.to_string(), to); debug!("old: {}", s); s = from.replace_all(&s, to.as_str()).to_string(); debug!("new: {}", s); } s } } use std::str::FromStr; impl FromStr for Settings { type Err = serde_yaml::Error; /// Decode a litteral string into a `Settings` struct. Works only for /// supported file types described in `SettingsType`. It will try to decode /// the input `s` by any mean known by `SettingsType`. /// /// # Arguments /// /// * `s` - litteral string to decode into a `Settings` struct /// /// # Example /// /// ``` /// # use std::str::FromStr; /// let s = r#"{"version":"1","categories":{},"rules":{}}"#; /// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap(); /// ``` fn from_str(s: &str) -> Result { match serde_json::from_str::(s) { Ok(val) => Ok(val), Err(_) => match serde_yaml::from_str::(s) { Ok(val) => Ok(val), Err(e) => { error!("Could not decode input {}: {}", s, e.to_string()); return Err(e); } }, } } } use std::fmt; use std::fmt::Display; impl Display for Settings { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", serde_json::to_string(&self).unwrap()) } } impl PartialEq for Settings { fn eq(&self, other: &Self) -> bool { self.version == other.version && self.categories == other.categories && self.rules == other.rules } } impl Eq for Settings {} #[test] fn write_settings() { let s = Settings::new(); let path = std::path::Path::new("test.yaml"); let settings = r#"--- version: "1" categories: {} rules: {}"#; utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap(); assert_eq!(settings, utils::read_file(&path).unwrap()); } #[test] fn read_settings() { let s1 = Settings::new(); let path = std::path::Path::new("test.yml"); s1.export(&path).unwrap(); let s2 = Settings::import(&path).unwrap(); assert_eq!(s1, s2); }