lang-evolve-core/src/settings/mod.rs

429 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

extern crate serde;
extern crate serde_json;
extern crate serde_yaml;
use serde::{Deserialize, Serialize};
extern crate log;
use log::{debug, error, info};
pub mod utils;
use utils::SettingsType;
pub mod regex_wrapper;
use regex_wrapper::Regex;
/// Current version of the ruleset. It will help determine if the ruleset is
/// outdated or from a more recent version of the software than the one being in
/// use.
pub const RULESET_CURRENT_VERSION: i32 = 1;
/// Encode a [`Settings`] struct to a filetype, returns a
/// `std::result::Result<std::string::String, std::io::Error>`
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, must implement `to_string`
/// * `content` - content to encode, must be `Settings` struct
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::settings::*;
/// # use lang_evolve_core::encode_settings;
/// use std::io::{Error, ErrorKind};
/// use std::path::Path;
/// let filetype = utils::get_file_type(Path::new("./path/to/file.json"));
/// let s = Settings::new();
/// let content = match filetype {
/// utils::SettingsType::Yaml => encode_settings!(serde_yaml, &s).unwrap(),
/// utils::SettingsType::Json => encode_settings!(serde_json, &s).unwrap(),
/// _ => panic!("Could not encode settings"),
/// };
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
#[macro_export(local_inner_macros)]
macro_rules! encode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::to_string($content) {
Err(e) => {
log::error!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
}
};
}
/// Decode a [`Settings`] struct from a `std::std::String`, returns a
/// std::result::Result<lang_evolve_core::settings::Settings, std::io::Error>
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string`
/// * `content` - `&str` to decode into a [`Settings`]
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::decode_settings;
/// let str = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = decode_settings!(serde_json, str);
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
#[macro_export(local_inner_macros)]
macro_rules! decode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::from_str($content) {
Err(e) => {
log::error!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}
};
}
use std::collections::HashMap;
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
/// while running, which includes the phoneme categories as well as
/// the soundchange rules to apply to the input text.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Settings {
/// Represents the version of the loaded ruleset.
///
/// It is used to detect obsolete ruleset representations or if a
/// loaded ruleset comes from a newer version of lang_evolve_core
/// than the one used by the user.
#[serde(default = "Settings::get_ruleset_version")]
pub version: String,
/// Categories of phonemes
///
/// This is a vector of categories of phonemes, represented
/// themselves as pairs of strings. Each pair of strings has its
/// first element represent the name of the category, which is
/// generally represented by a single capital letter. The second
/// element is a string where all its characters represent
/// phonemes. It is currently not possible to have more than one
/// character to be considered as one sound.
#[serde(default)]
pub categories: HashMap<String, String>,
/// Soundchange rules
///
/// This is a vector of pairs of strings, the first one represents
/// a regex to be matched while the second represents the change
/// to be made to the input data.
#[serde(default)]
pub rules: Vec<(Regex, String)>,
}
/// Representation inside the crate of LangEvolves settings.
impl Settings {
/// Creates a new empty instance of [`Settings`]
///
/// # Example
///
/// ```
/// let s = lang_evolve_core::settings::Settings::new();
/// let content_yaml = r#"---
/// version: "1"
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
pub fn new() -> Self {
Self {
version: Self::get_ruleset_version(),
categories: HashMap::new(),
rules: Vec::new(),
}
}
/// Import settings from an imput file.
///
/// The currently allowed file formats are described in the
/// [`utils::SettingsType`] enum. If the ruleset version is higher than the
/// current version (see [`Settings.version`]), then an error is returned.
///
/// # Arguments
///
/// * `path` - File to open and load settings from
///
/// # Example
///
/// ```no_run
/// use std::path::Path;
/// use lang_evolve_core::settings::Settings;
/// let path_json = Path::new("settings.json");
/// let _s_json = Settings::import(&path_json).unwrap();
///
/// let path_yaml = Path::new("settings.yaml");
/// let _s_yaml = Settings::import(&path_yaml).unwrap();
///
/// let path_yml = Path::new("settings.yml");
/// let _s_yml = Settings::import(&path_yml).unwrap();
/// ```
///
/// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html
/// [`Settings.version`]: ./struct.Settings.html#structfield.version
pub fn import(path: &std::path::Path) -> std::io::Result<Self> {
use utils::SettingsType::{Json, Yaml};
let file_type = utils::get_file_type(&path);
let content = utils::read_file(&path)?;
let settings: Settings = match file_type {
Yaml => decode_settings!(serde_yaml, &content),
Json => decode_settings!(serde_json, &content),
// Attempt to decode anyway
_ => match Settings::from_str(&content.as_str()) {
Ok(val) => val,
Err(e) => {
error!(
"Could not decode input {}: {}",
content,
e.to_string()
);
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
},
};
if settings.version > Self::get_ruleset_version() {
error!("Ruleset version too high!");
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Ruleset version too high!",
));
}
info!("Successfuly imported {}", path.display());
Ok(settings)
}
/// Import settings from file path described by the argument `path`
///
/// # Arguments
///
/// * `path` - path to the file from which settings should be imported
///
/// # Example
///
/// ```no_run
/// let s = lang_evolve_core::settings::Settings::from("settings.yml");
/// ```
pub fn from<S>(s: S) -> Self
where
S: ToString,
{
let s = s.to_string();
let path = std::path::Path::new(&s);
Self::import(&path).unwrap()
}
/// Export current settings to a file.
///
/// The allowed file formats are described in the [`SettingsType`] enum.
///
/// # Arguments
///
/// * `path` - Path to write and export settings to
///
/// # Example
///
/// ```
/// use std::path::Path;
/// let s = lang_evolve_core::settings::Settings::new();
///
/// // Export to JSON
/// let path_json = Path::new("./output.json");
/// s.export(&path_json).unwrap();
///
/// // Export to Yaml, both ".yml" and ".yaml" work
/// let path_yaml = Path::new("./output.yaml");
/// s.export(&path_yaml).unwrap();
/// let path_yml = Path::new("./output.yml");
/// s.export(&path_yml).unwrap();
/// ```
///
/// [`SettingsType`]: ./utils/enum.SettingsType.html
pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> {
let filetype = utils::get_file_type(&path);
let content = match filetype {
SettingsType::Yaml => encode_settings!(serde_yaml, &self),
SettingsType::Json => encode_settings!(serde_json, &self),
_ => {
error!("Unknown filetype {}", path.to_str().unwrap());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Unknown file type",
));
}
};
info!("Successfuly exported settings to {}", path.display());
utils::write_file(&path, &content)
}
/// Get the current ruleset version of LangEvolve.
fn get_ruleset_version() -> String {
RULESET_CURRENT_VERSION.to_string()
}
fn update_rules(&self) -> std::result::Result<Vec<(Regex, String)>, String> {
let mut rules = self.rules.clone();
// TODO break categories in different rules
for (from, to) in rules.iter_mut() {
let re = Regex::new("%\\D");
let from_match = re.is_match(from.as_str());
let to_match = re.is_match(to);
if from_match || to_match {
for (category, content) in &self.categories {
if from_match {
*from = Regex::new(
from.to_string()
.replace(
format!("%{}", category).as_str(),
format!("[{}]", content).as_str(),
)
.as_str()
);
}
if to_match {
*to = to.to_string().replace(
format!("%{}", category).as_str(),
format!("[{}]", content).as_str()
);
}
}
}
}
Ok(rules)
}
/// Apply list of rules to input
///
/// The list of rules in the struct will be applied to the input `s`. If the
/// rule contains the `%` character followed by a capital letter, this marks
/// a category of phonemes and should be replaced by them. For instance, we
/// have:
/// - the category `C` defined as `bcdfg`
/// - the rule `%Ci` to `%Cj`
/// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j`
///
/// # Arguments
///
/// * `s` - Input to modify
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let settings = Settings::new();
/// // add some rules...
/// let input = String::new();
/// // set some input
/// let _output = settings.apply(input);
/// ```
pub fn apply(&self, s: String) -> std::result::Result<String, String> {
// TODO Add Error handling
let rules = self.update_rules().unwrap();
let mut s = s.clone();
debug!("===============================================");
for (from, to) in rules {
debug!("from: {}\tto: {}", from.to_string(), to);
debug!("old: {}", s);
s = from.replace_all(&s, to.as_str()).to_string();
debug!("new: {}", s);
}
Ok(s)
}
}
use std::str::FromStr;
impl FromStr for Settings {
type Err = serde_yaml::Error;
/// Decode a litteral string into a `Settings` struct. Works only for
/// supported file types described in `SettingsType`. It will try to decode
/// the input `s` by any mean known by `SettingsType`.
///
/// # Arguments
///
/// * `s` - litteral string to decode into a `Settings` struct
///
/// # Example
///
/// ```
/// # use std::str::FromStr;
/// let s = r#"{"version":"1","categories":{},"rules":[]}"#;
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
match serde_json::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(_) => match serde_yaml::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(e) => {
error!("Could not decode input {}: {}", s, e.to_string());
return Err(e);
}
},
}
}
}
use std::fmt;
use std::fmt::Display;
impl Display for Settings {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", serde_json::to_string(&self).unwrap())
}
}
impl PartialEq for Settings {
fn eq(&self, other: &Self) -> bool {
self.version == other.version
&& self.categories == other.categories
&& self.rules == other.rules
}
}
impl Eq for Settings {}
#[test]
fn write_settings() {
let s = Settings::new();
let path = std::path::Path::new("test.yaml");
let settings = r#"---
version: "1"
categories: {}
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());
}
#[test]
fn read_settings() {
let s1 = Settings::new();
let path = std::path::Path::new("test.yml");
s1.export(&path).unwrap();
let s2 = Settings::import(&path).unwrap();
assert_eq!(s1, s2);
}