lang-evolve-core/src/settings/mod.rs

447 lines
14 KiB
Rust
Raw Normal View History

use serde::{Deserialize, Serialize};
use log::{debug, error, info};
use crate::utils::{self, SettingsType};
use prettydiff::diff_words;
mod rule;
use rule::Rule;
2020-03-27 22:31:24 +00:00
/// Current version of the ruleset. It will help determine if the ruleset is
/// outdated or from a more recent version of the software than the one being in
/// use.
const RULESET_CURRENT_VERSION: i32 = 1;
/// Encode a [`Settings`] struct to a filetype, returns a
/// `std::result::Result<std::string::String, std::io::Error>`
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, must implement `to_string`
/// * `content` - content to encode, must be `Settings` struct
///
/// # Example
///
/// ```no_run
/// use std::path::Path;
/// use lang_evolve_core::utils;
///
/// let settings = Settings::new();
/// let filetype = utils::get_file_type(Path::new("settings.yml"));
///
/// let content = match filetype {
/// SettingsType::Yaml => encode_settings!(serde_yaml, &settings),
/// SettingsType::Json => encode_settings!(serde_json, &settings),
/// _ => String::from("Error!"),
/// };
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
macro_rules! encode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::to_string($content) {
Err(e) => {
log::error!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
}
};
}
/// Decode a [`Settings`] struct from a `std::std::String`, returns a
/// std::result::Result<lang_evolve_core::settings::Settings, std::io::Error>
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string`
/// * `content` - `&str` to decode into a [`Settings`]
///
/// # Example
///
/// ```no_run
/// # use lang_evolve_core::decode_settings;
/// let str = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = decode_settings!(serde_json, str);
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
macro_rules! decode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::from_str($content) {
Err(e) => {
log::error!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}
};
}
use std::collections::HashMap;
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
/// while running, which includes the phoneme categories as well as
/// the soundchange rules to apply to the input text.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Settings {
/// Represents the version of the loaded ruleset.
///
/// It is used to detect obsolete ruleset representations or if a
/// loaded ruleset comes from a newer version of lang_evolve_core
/// than the one used by the user.
#[serde(default = "Settings::get_ruleset_version")]
version: String,
/// Categories of phonemes
///
/// This is a vector of categories of phonemes, represented
/// themselves as pairs of strings. Each pair of strings has its
/// first element represent the name of the category, which is
/// generally represented by a single capital letter. The second
/// element is a string where all its characters represent
/// phonemes. It is currently not possible to have more than one
/// character to be considered as one sound.
#[serde(default)]
categories: HashMap<String, String>,
/// Soundchange rules
///
/// This is a vector of pairs of strings, the first one represents
/// a regex to be matched while the second represents the change
/// to be made to the input data.
#[serde(default)]
rules: Vec<Rule>,
}
2020-03-27 17:26:31 +00:00
/// Representation inside the crate of LangEvolves settings.
impl Settings {
/// Creates a new empty instance of [`Settings`]
2020-03-27 17:26:31 +00:00
///
/// # Example
///
/// ```
/// let s = lang_evolve_core::settings::Settings::new();
/// let content_yaml = r#"---
/// version: "1"
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
2020-03-27 17:26:31 +00:00
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
pub fn new() -> Self {
Self {
version: Self::get_ruleset_version(),
categories: HashMap::new(),
rules: Vec::new(),
}
}
/// Import settings from an imput file.
///
/// The currently allowed file formats are described in the
/// [`utils::SettingsType`] enum. If the ruleset version is higher than the
/// current version (see [`Settings.version`]), then an error is returned.
2020-03-27 17:26:31 +00:00
///
/// # Arguments
///
/// * `path` - File to open and load settings from
///
/// # Example
///
/// ```
2020-03-27 17:26:31 +00:00
/// use std::path::Path;
/// # use lang_evolve_core::settings::Settings;
/// # let s = Settings::new();
/// # for path in vec!["settings.json", "settings.yaml", "settings.yml"] {
/// # let path = Path::new(path);
/// # s.export(&path).unwrap();
/// # }
///
2020-03-27 17:26:31 +00:00
/// let path_json = Path::new("settings.json");
/// let _s_json = Settings::import(&path_json).unwrap();
///
/// let path_yaml = Path::new("settings.yaml");
/// let _s_yaml = Settings::import(&path_yaml).unwrap();
///
/// let path_yml = Path::new("settings.yml");
/// let _s_yml = Settings::import(&path_yml).unwrap();
/// ```
///
/// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html
/// [`Settings.version`]: ./struct.Settings.html#structfield.version
pub fn import(path: &std::path::Path) -> std::io::Result<Self> {
use SettingsType::{Json, Yaml};
let file_type = utils::get_file_type(&path);
let content = utils::read_file(&path)?;
let settings: Settings = match file_type {
Yaml => decode_settings!(serde_yaml, &content),
Json => decode_settings!(serde_json, &content),
// Attempt to decode anyway
_ => match Settings::from_str(&content.as_str()) {
Ok(val) => val,
Err(e) => {
error!(
"Could not decode input {}: {}",
content,
e.to_string()
);
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
},
};
2020-07-12 10:28:19 +00:00
if settings.version > Self::get_ruleset_version() {
error!("Ruleset version too high!");
2020-07-12 10:28:19 +00:00
Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Ruleset version too high!",
2020-07-12 10:28:19 +00:00
))
} else {
info!("Successfuly imported {}", path.display());
Ok(settings)
}
}
2020-03-27 17:26:31 +00:00
/// Import settings from file path described by the argument `path`
///
/// # Arguments
///
/// * `path` - path to the file from which settings should be imported
///
/// # Example
///
/// ```no_run
/// let s = lang_evolve_core::settings::Settings::from("settings.yml");
/// ```
2020-07-12 10:28:19 +00:00
pub fn from<S>(s: S) -> std::io::Result<Self>
where
S: ToString,
{
let s = s.to_string();
let path = std::path::Path::new(&s);
2020-07-12 10:28:19 +00:00
Self::import(&path)
}
/// Add a new rule to the current settings
///
/// # Arguments
///
/// * `from` - Regex that should match the text to be replaced
/// * `to` - Regex that should replace some text
pub fn add_rule(&mut self, from: &str, to: &str) {
self.rules.push(Rule::new(from, to))
}
/// Add a new category of phonemes to the current settings
///
/// # Arguments
///
/// * `name` - Name of the category
/// * `content` - Content of the category, phonemes
pub fn add_category(&mut self, name: &str, content: &str) {
self.categories.insert(String::from(name), String::from(content));
}
/// Export current settings to a file.
///
/// The allowed file formats are described in the [`SettingsType`] enum.
2020-03-27 17:26:31 +00:00
///
/// # Arguments
///
/// * `path` - Path to write and export settings to
///
/// # Example
///
/// ```
2020-07-11 15:47:28 +00:00
/// # use lang_evolve_core::settings::Settings;
2020-03-27 17:26:31 +00:00
/// use std::path::Path;
2020-07-11 15:47:28 +00:00
///
/// let s = Settings::new();
2020-03-27 17:26:31 +00:00
///
/// // Export to JSON
2020-07-12 10:24:00 +00:00
/// let path_json = Path::new("settings.json");
2020-03-27 17:26:31 +00:00
/// s.export(&path_json).unwrap();
///
/// // Export to Yaml, both ".yml" and ".yaml" work
2020-07-12 10:24:00 +00:00
/// let path_yaml = Path::new("settings.yaml");
2020-03-27 17:26:31 +00:00
/// s.export(&path_yaml).unwrap();
2020-07-12 10:24:00 +00:00
/// let path_yml = Path::new("settings.yml");
2020-03-27 17:26:31 +00:00
/// s.export(&path_yml).unwrap();
/// ```
///
/// [`SettingsType`]: ./utils/enum.SettingsType.html
2020-03-27 17:26:31 +00:00
pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> {
let filetype = utils::get_file_type(&path);
2020-03-27 17:26:31 +00:00
let content = match filetype {
SettingsType::Yaml => encode_settings!(serde_yaml, &self),
SettingsType::Json => encode_settings!(serde_json, &self),
_ => {
error!("Unknown filetype {}", path.to_str().unwrap());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Unknown file type",
));
}
2020-03-27 17:26:31 +00:00
};
2020-03-27 22:26:16 +00:00
info!("Successfuly exported settings to {}", path.display());
2020-03-27 17:26:31 +00:00
utils::write_file(&path, &content)
}
/// Get the current ruleset version of LangEvolve.
fn get_ruleset_version() -> String {
2020-03-27 22:26:16 +00:00
RULESET_CURRENT_VERSION.to_string()
2020-03-27 17:26:31 +00:00
}
fn update_rules(&self) -> std::result::Result<Vec<Rule>, String> {
let rules = self.rules.clone();
let rules: Vec<Rule> = rules
.iter()
2020-07-11 15:47:28 +00:00
.map(|rule| rule.update(&self.categories).unwrap())
.collect();
2020-04-04 16:54:46 +00:00
Ok(rules)
}
/// Apply list of rules to input
///
2020-07-11 15:47:28 +00:00
/// The list of rules in the struct will be applied to the input `new`. If the
/// rule contains the `%` character followed by a capital letter, this marks
/// a category of phonemes and should be replaced by them. For instance, we
/// have:
/// - the category `C` defined as `bcdfg`
/// - the rule `%Ci` to `%Cj`
/// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j`
///
/// # Arguments
///
2020-07-11 15:47:28 +00:00
/// * `new` - Input to modify
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let settings = Settings::new();
/// // add some rules...
/// // set some input
2020-07-11 15:47:28 +00:00
/// let input = String::new();
/// let _output = settings.apply(input);
/// ```
pub fn apply(&self, s: String) -> std::result::Result<String, String> {
2020-04-04 16:54:46 +00:00
// TODO Add Error handling
let rules = self.update_rules().unwrap();
let mut s = s;
debug!("===============================================");
for rule in rules {
debug!(
"from: \"{}\"\tto: \"{}\"",
rule.get_from().to_string(),
rule.get_to()
);
let old = s.clone();
let new = rule
.get_from()
.replace_all(&s, rule.get_to().as_str())
.to_string();
if cfg!(debug_assertions) {
let diffs = diff_words(&old, &new);
if diffs.diff().len() > 1 {
debug!("diff:\n{}", diff_words(&old, &new));
} else {
debug!("diff: No changes");
}
}
s = new;
}
Ok(s)
}
}
2020-07-11 15:52:14 +00:00
impl Default for Settings {
/// Creates a new empty instance of [`Settings`]
///
/// # Example
///
/// ```
/// let s = lang_evolve_core::settings::Settings::default();
/// let content_yaml = r#"---
/// version: "1"
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
fn default() -> Self {
Self::new()
}
}
use std::str::FromStr;
impl FromStr for Settings {
type Err = serde_yaml::Error;
/// Decode a litteral string into a `Settings` struct. Works only for
/// supported file types described in `SettingsType`. It will try to decode
/// the input `s` by any mean known by `SettingsType`.
2020-03-27 22:25:16 +00:00
///
/// # Arguments
///
/// * `s` - litteral string to decode into a `Settings` struct
2020-03-27 22:25:16 +00:00
///
/// # Example
///
/// ```
/// # use std::str::FromStr;
/// let s = r#"{"version":"1","categories":{},"rules":[]}"#;
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
match serde_json::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(_) => match serde_yaml::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(e) => {
error!("Could not decode input {}: {}", s, e.to_string());
Err(e)
}
},
}
}
}
use std::fmt;
use std::fmt::Display;
impl Display for Settings {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", serde_json::to_string(&self).unwrap())
}
}
impl PartialEq for Settings {
fn eq(&self, other: &Self) -> bool {
self.version == other.version
&& self.categories == other.categories
&& self.rules == other.rules
2020-03-27 22:25:16 +00:00
}
}
impl Eq for Settings {}