lang-evolve-core/src/settings/mod.rs

429 lines
14 KiB
Rust
Raw Normal View History

extern crate serde;
extern crate serde_json;
extern crate serde_yaml;
use serde::{Deserialize, Serialize};
extern crate log;
use log::{debug, error, info};
pub mod utils;
use utils::SettingsType;
pub mod regex_wrapper;
use regex_wrapper::Regex;
2020-03-27 22:31:24 +00:00
/// Current version of the ruleset. It will help determine if the ruleset is
/// outdated or from a more recent version of the software than the one being in
/// use.
pub const RULESET_CURRENT_VERSION: i32 = 1;
/// Encode a [`Settings`] struct to a filetype, returns a
/// `std::result::Result<std::string::String, std::io::Error>`
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, must implement `to_string`
/// * `content` - content to encode, must be `Settings` struct
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::settings::*;
/// # use lang_evolve_core::encode_settings;
/// use std::io::{Error, ErrorKind};
/// use std::path::Path;
/// let filetype = utils::get_file_type(Path::new("./path/to/file.json"));
/// let s = Settings::new();
/// let content = match filetype {
/// utils::SettingsType::Yaml => encode_settings!(serde_yaml, &s).unwrap(),
/// utils::SettingsType::Json => encode_settings!(serde_json, &s).unwrap(),
/// _ => panic!("Could not encode settings"),
/// };
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
#[macro_export(local_inner_macros)]
macro_rules! encode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::to_string($content) {
Err(e) => {
log::error!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
}
};
}
/// Decode a [`Settings`] struct from a `std::std::String`, returns a
/// std::result::Result<lang_evolve_core::settings::Settings, std::io::Error>
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string`
/// * `content` - `&str` to decode into a [`Settings`]
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::decode_settings;
/// let str = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = decode_settings!(serde_json, str);
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
#[macro_export(local_inner_macros)]
macro_rules! decode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::from_str($content) {
Err(e) => {
log::error!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}
};
}
use std::collections::HashMap;
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
/// while running, which includes the phoneme categories as well as
/// the soundchange rules to apply to the input text.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Settings {
/// Represents the version of the loaded ruleset.
///
/// It is used to detect obsolete ruleset representations or if a
/// loaded ruleset comes from a newer version of lang_evolve_core
/// than the one used by the user.
#[serde(default = "Settings::get_ruleset_version")]
pub version: String,
/// Categories of phonemes
///
/// This is a vector of categories of phonemes, represented
/// themselves as pairs of strings. Each pair of strings has its
/// first element represent the name of the category, which is
/// generally represented by a single capital letter. The second
/// element is a string where all its characters represent
/// phonemes. It is currently not possible to have more than one
/// character to be considered as one sound.
#[serde(default)]
pub categories: HashMap<String, String>,
/// Soundchange rules
///
/// This is a vector of pairs of strings, the first one represents
/// a regex to be matched while the second represents the change
/// to be made to the input data.
#[serde(default)]
pub rules: Vec<(Regex, String)>,
}
2020-03-27 17:26:31 +00:00
/// Representation inside the crate of LangEvolves settings.
impl Settings {
/// Creates a new empty instance of [`Settings`]
2020-03-27 17:26:31 +00:00
///
/// # Example
///
/// ```
/// let s = lang_evolve_core::settings::Settings::new();
/// let content_yaml = r#"---
/// version: "1"
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
2020-03-27 17:26:31 +00:00
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
pub fn new() -> Self {
Self {
version: Self::get_ruleset_version(),
categories: HashMap::new(),
rules: Vec::new(),
}
}
/// Import settings from an imput file.
///
/// The currently allowed file formats are described in the
/// [`utils::SettingsType`] enum. If the ruleset version is higher than the
/// current version (see [`Settings.version`]), then an error is returned.
2020-03-27 17:26:31 +00:00
///
/// # Arguments
///
/// * `path` - File to open and load settings from
///
/// # Example
///
/// ```no_run
/// use std::path::Path;
/// use lang_evolve_core::settings::Settings;
/// let path_json = Path::new("settings.json");
/// let _s_json = Settings::import(&path_json).unwrap();
///
/// let path_yaml = Path::new("settings.yaml");
/// let _s_yaml = Settings::import(&path_yaml).unwrap();
///
/// let path_yml = Path::new("settings.yml");
/// let _s_yml = Settings::import(&path_yml).unwrap();
/// ```
///
/// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html
/// [`Settings.version`]: ./struct.Settings.html#structfield.version
pub fn import(path: &std::path::Path) -> std::io::Result<Self> {
use utils::SettingsType::{Json, Yaml};
let file_type = utils::get_file_type(&path);
let content = utils::read_file(&path)?;
let settings: Settings = match file_type {
Yaml => decode_settings!(serde_yaml, &content),
Json => decode_settings!(serde_json, &content),
// Attempt to decode anyway
_ => match Settings::from_str(&content.as_str()) {
Ok(val) => val,
Err(e) => {
error!(
"Could not decode input {}: {}",
content,
e.to_string()
);
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
},
};
if settings.version > Self::get_ruleset_version() {
error!("Ruleset version too high!");
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Ruleset version too high!",
));
}
2020-03-27 22:26:16 +00:00
info!("Successfuly imported {}", path.display());
Ok(settings)
}
2020-03-27 17:26:31 +00:00
/// Import settings from file path described by the argument `path`
///
/// # Arguments
///
/// * `path` - path to the file from which settings should be imported
///
/// # Example
///
/// ```no_run
/// let s = lang_evolve_core::settings::Settings::from("settings.yml");
/// ```
pub fn from<S>(s: S) -> Self
where
S: ToString,
{
let s = s.to_string();
let path = std::path::Path::new(&s);
Self::import(&path).unwrap()
}
/// Export current settings to a file.
///
/// The allowed file formats are described in the [`SettingsType`] enum.
2020-03-27 17:26:31 +00:00
///
/// # Arguments
///
/// * `path` - Path to write and export settings to
///
/// # Example
///
/// ```
/// use std::path::Path;
/// let s = lang_evolve_core::settings::Settings::new();
///
/// // Export to JSON
/// let path_json = Path::new("./output.json");
/// s.export(&path_json).unwrap();
///
/// // Export to Yaml, both ".yml" and ".yaml" work
/// let path_yaml = Path::new("./output.yaml");
/// s.export(&path_yaml).unwrap();
/// let path_yml = Path::new("./output.yml");
/// s.export(&path_yml).unwrap();
/// ```
///
/// [`SettingsType`]: ./utils/enum.SettingsType.html
2020-03-27 17:26:31 +00:00
pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> {
let filetype = utils::get_file_type(&path);
2020-03-27 17:26:31 +00:00
let content = match filetype {
SettingsType::Yaml => encode_settings!(serde_yaml, &self),
SettingsType::Json => encode_settings!(serde_json, &self),
_ => {
error!("Unknown filetype {}", path.to_str().unwrap());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Unknown file type",
));
}
2020-03-27 17:26:31 +00:00
};
2020-03-27 22:26:16 +00:00
info!("Successfuly exported settings to {}", path.display());
2020-03-27 17:26:31 +00:00
utils::write_file(&path, &content)
}
/// Get the current ruleset version of LangEvolve.
fn get_ruleset_version() -> String {
2020-03-27 22:26:16 +00:00
RULESET_CURRENT_VERSION.to_string()
2020-03-27 17:26:31 +00:00
}
2020-04-04 16:54:46 +00:00
fn update_rules(&self) -> std::result::Result<Vec<(Regex, String)>, String> {
let mut rules = self.rules.clone();
// TODO break categories in different rules
2020-04-04 16:54:46 +00:00
for (from, to) in rules.iter_mut() {
let re = Regex::new("%\\D");
let from_match = re.is_match(from.as_str());
let to_match = re.is_match(to);
if from_match || to_match {
for (category, content) in &self.categories {
if from_match {
*from = Regex::new(
from.to_string()
.replace(
format!("%{}", category).as_str(),
format!("[{}]", content).as_str(),
)
.as_str()
);
}
if to_match {
*to = to.to_string().replace(
format!("%{}", category).as_str(),
2020-04-04 16:54:46 +00:00
format!("[{}]", content).as_str()
);
}
}
}
}
2020-04-04 16:54:46 +00:00
Ok(rules)
}
/// Apply list of rules to input
///
/// The list of rules in the struct will be applied to the input `s`. If the
/// rule contains the `%` character followed by a capital letter, this marks
/// a category of phonemes and should be replaced by them. For instance, we
/// have:
/// - the category `C` defined as `bcdfg`
/// - the rule `%Ci` to `%Cj`
/// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j`
///
/// # Arguments
///
/// * `s` - Input to modify
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let settings = Settings::new();
/// // add some rules...
/// let input = String::new();
/// // set some input
/// let _output = settings.apply(input);
/// ```
pub fn apply(&self, s: String) -> std::result::Result<String, String> {
2020-04-04 16:54:46 +00:00
// TODO Add Error handling
let rules = self.update_rules().unwrap();
let mut s = s.clone();
debug!("===============================================");
for (from, to) in rules {
debug!("from: {}\tto: {}", from.to_string(), to);
debug!("old: {}", s);
s = from.replace_all(&s, to.as_str()).to_string();
debug!("new: {}", s);
}
Ok(s)
}
}
use std::str::FromStr;
impl FromStr for Settings {
type Err = serde_yaml::Error;
/// Decode a litteral string into a `Settings` struct. Works only for
/// supported file types described in `SettingsType`. It will try to decode
/// the input `s` by any mean known by `SettingsType`.
2020-03-27 22:25:16 +00:00
///
/// # Arguments
///
/// * `s` - litteral string to decode into a `Settings` struct
2020-03-27 22:25:16 +00:00
///
/// # Example
///
/// ```
/// # use std::str::FromStr;
/// let s = r#"{"version":"1","categories":{},"rules":[]}"#;
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
match serde_json::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(_) => match serde_yaml::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(e) => {
error!("Could not decode input {}: {}", s, e.to_string());
return Err(e);
}
},
}
}
}
use std::fmt;
use std::fmt::Display;
impl Display for Settings {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", serde_json::to_string(&self).unwrap())
}
}
impl PartialEq for Settings {
fn eq(&self, other: &Self) -> bool {
self.version == other.version
&& self.categories == other.categories
&& self.rules == other.rules
2020-03-27 22:25:16 +00:00
}
}
impl Eq for Settings {}
#[test]
fn write_settings() {
let s = Settings::new();
let path = std::path::Path::new("test.yaml");
let settings = r#"---
version: "1"
categories: {}
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());
}
2020-03-27 17:26:31 +00:00
#[test]
fn read_settings() {
let s1 = Settings::new();
let path = std::path::Path::new("test.yml");
s1.export(&path).unwrap();
let s2 = Settings::import(&path).unwrap();
assert_eq!(s1, s2);
}