Reorganized modules, made rules a separate struct
Rules are now a separate struct, and the `rules` member of `settings::Settings` is now a `Vec<settings::rule::Rule`. Several elements were made private, now can be accessed through dedicated methods
This commit is contained in:
@@ -6,16 +6,15 @@ use serde::{Deserialize, Serialize};
|
||||
extern crate log;
|
||||
use log::{debug, error, info};
|
||||
|
||||
pub mod utils;
|
||||
use utils::SettingsType;
|
||||
use crate::utils::{self, SettingsType};
|
||||
|
||||
pub mod regex_wrapper;
|
||||
use regex_wrapper::Regex;
|
||||
mod rule;
|
||||
use rule::Rule;
|
||||
|
||||
/// Current version of the ruleset. It will help determine if the ruleset is
|
||||
/// outdated or from a more recent version of the software than the one being in
|
||||
/// use.
|
||||
pub const RULESET_CURRENT_VERSION: i32 = 1;
|
||||
const RULESET_CURRENT_VERSION: i32 = 1;
|
||||
|
||||
/// Encode a [`Settings`] struct to a filetype, returns a
|
||||
/// `std::result::Result<std::string::String, std::io::Error>`
|
||||
@@ -42,7 +41,6 @@ pub const RULESET_CURRENT_VERSION: i32 = 1;
|
||||
/// ```
|
||||
///
|
||||
/// [`Settings`]: ./settings/struct.Settings.html
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! encode_settings {
|
||||
($funcrate:ident, $content:expr) => {
|
||||
match $funcrate::to_string($content) {
|
||||
@@ -75,7 +73,6 @@ macro_rules! encode_settings {
|
||||
/// ```
|
||||
///
|
||||
/// [`Settings`]: ./settings/struct.Settings.html
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! decode_settings {
|
||||
($funcrate:ident, $content:expr) => {
|
||||
match $funcrate::from_str($content) {
|
||||
@@ -105,7 +102,7 @@ pub struct Settings {
|
||||
/// loaded ruleset comes from a newer version of lang_evolve_core
|
||||
/// than the one used by the user.
|
||||
#[serde(default = "Settings::get_ruleset_version")]
|
||||
pub version: String,
|
||||
version: String,
|
||||
|
||||
/// Categories of phonemes
|
||||
///
|
||||
@@ -117,7 +114,7 @@ pub struct Settings {
|
||||
/// phonemes. It is currently not possible to have more than one
|
||||
/// character to be considered as one sound.
|
||||
#[serde(default)]
|
||||
pub categories: HashMap<String, String>,
|
||||
categories: HashMap<String, String>,
|
||||
|
||||
/// Soundchange rules
|
||||
///
|
||||
@@ -125,7 +122,7 @@ pub struct Settings {
|
||||
/// a regex to be matched while the second represents the change
|
||||
/// to be made to the input data.
|
||||
#[serde(default)]
|
||||
pub rules: Vec<(Regex, String)>,
|
||||
rules: Vec<Rule>,
|
||||
}
|
||||
|
||||
/// Representation inside the crate of LangEvolve’s settings.
|
||||
@@ -182,10 +179,9 @@ impl Settings {
|
||||
/// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html
|
||||
/// [`Settings.version`]: ./struct.Settings.html#structfield.version
|
||||
pub fn import(path: &std::path::Path) -> std::io::Result<Self> {
|
||||
use utils::SettingsType::{Json, Yaml};
|
||||
use SettingsType::{Json, Yaml};
|
||||
let file_type = utils::get_file_type(&path);
|
||||
let content = utils::read_file(&path)?;
|
||||
|
||||
let settings: Settings = match file_type {
|
||||
Yaml => decode_settings!(serde_yaml, &content),
|
||||
Json => decode_settings!(serde_json, &content),
|
||||
@@ -236,6 +232,26 @@ impl Settings {
|
||||
Self::import(&path).unwrap()
|
||||
}
|
||||
|
||||
/// Add a new rule to the current settings
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `from` - Regex that should match the text to be replaced
|
||||
/// * `to` - Regex that should replace some text
|
||||
pub fn add_rule(&mut self, from: &str, to: &str) {
|
||||
self.rules.push(Rule::new(from, to))
|
||||
}
|
||||
|
||||
/// Add a new category of phonemes to the current settings
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `name` - Name of the category
|
||||
/// * `content` - Content of the category, phonemes
|
||||
pub fn add_category(&mut self, name: &str, content: &str) {
|
||||
self.categories.insert(String::from(name), String::from(content));
|
||||
}
|
||||
|
||||
/// Export current settings to a file.
|
||||
///
|
||||
/// The allowed file formats are described in the [`SettingsType`] enum.
|
||||
@@ -284,35 +300,12 @@ impl Settings {
|
||||
RULESET_CURRENT_VERSION.to_string()
|
||||
}
|
||||
|
||||
fn update_rules(&self) -> std::result::Result<Vec<(Regex, String)>, String> {
|
||||
let mut rules = self.rules.clone();
|
||||
|
||||
// TODO break categories in different rules
|
||||
for (from, to) in rules.iter_mut() {
|
||||
let re = Regex::new("%\\D");
|
||||
let from_match = re.is_match(from.as_str());
|
||||
let to_match = re.is_match(to);
|
||||
if from_match || to_match {
|
||||
for (category, content) in &self.categories {
|
||||
if from_match {
|
||||
*from = Regex::new(
|
||||
from.to_string()
|
||||
.replace(
|
||||
format!("%{}", category).as_str(),
|
||||
format!("[{}]", content).as_str(),
|
||||
)
|
||||
.as_str()
|
||||
);
|
||||
}
|
||||
if to_match {
|
||||
*to = to.to_string().replace(
|
||||
format!("%{}", category).as_str(),
|
||||
format!("[{}]", content).as_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn update_rules(&self) -> std::result::Result<Vec<Rule>, String> {
|
||||
let rules = self.rules.clone();
|
||||
let rules: Vec<Rule> = rules
|
||||
.iter()
|
||||
.map(|x| x.update(&self.categories).unwrap())
|
||||
.collect();
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
@@ -345,10 +338,17 @@ impl Settings {
|
||||
let rules = self.update_rules().unwrap();
|
||||
let mut s = s.clone();
|
||||
debug!("===============================================");
|
||||
for (from, to) in rules {
|
||||
debug!("from: {}\tto: {}", from.to_string(), to);
|
||||
for rule in rules {
|
||||
debug!(
|
||||
"from: {}\tto: {}",
|
||||
rule.get_from().to_string(),
|
||||
rule.get_to()
|
||||
);
|
||||
debug!("old: {}", s);
|
||||
s = from.replace_all(&s, to.as_str()).to_string();
|
||||
s = rule
|
||||
.get_from()
|
||||
.replace_all(&s, rule.get_to().as_str())
|
||||
.to_string();
|
||||
debug!("new: {}", s);
|
||||
}
|
||||
Ok(s)
|
||||
|
||||
94
src/settings/rule/mod.rs
Normal file
94
src/settings/rule/mod.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
extern crate serde;
|
||||
extern crate serde_json;
|
||||
extern crate serde_yaml;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
mod regex_wrapper;
|
||||
use regex_wrapper::Regex;
|
||||
|
||||
/// Representation of a rule in LangEvolveRs
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Rule {
|
||||
/// Regex that should match the input text
|
||||
from: Regex,
|
||||
/// Text to replace matched text
|
||||
to: String,
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
/// Create new rule
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `from` - literal string that represents the regex that should match
|
||||
/// the input text
|
||||
/// * `to` - literal string that represents the regex text that should
|
||||
/// replaced the text matched by `from`
|
||||
pub fn new(from: &str, to: &str) -> Self {
|
||||
Rule {
|
||||
from: Regex::new(from),
|
||||
to: String::from(to),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO break categories in different rules
|
||||
pub fn update(
|
||||
&self,
|
||||
categories: &std::collections::HashMap<String, String>,
|
||||
) -> std::result::Result<Rule, String> {
|
||||
let mut rule = self.clone();
|
||||
let re = Regex::new("%\\D");
|
||||
let from_match = re.is_match(&self.from.as_str());
|
||||
let to_match = re.is_match(&self.to.as_str());
|
||||
if from_match && !to_match {
|
||||
for (category, content) in categories {
|
||||
rule.from = Regex::new(
|
||||
rule.from
|
||||
.to_string()
|
||||
.replace(
|
||||
format!("%{}", category).as_str(),
|
||||
format!("[{}]", content).as_str(),
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(rule)
|
||||
}
|
||||
|
||||
pub fn get_from(&self) -> &Regex {
|
||||
&self.from
|
||||
}
|
||||
|
||||
pub fn get_to(&self) -> String {
|
||||
self.to.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Rule {
|
||||
fn from(source: String) -> Self {
|
||||
let components: Vec<&str> = source.split_terminator(">").collect();
|
||||
Self {
|
||||
from: Regex::new(components[0]),
|
||||
to: String::from(components[1]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Rule {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.from == other.from && self.to == other.to
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Rule {}
|
||||
|
||||
#[test]
|
||||
fn rule_new() {
|
||||
let rule1 = Rule::new("([ae]+)i", "${1}i");
|
||||
let rule2 = Rule {
|
||||
from: Regex::new("([ae]+)i"),
|
||||
to: String::from("${1}i"),
|
||||
};
|
||||
assert_eq!(rule1, rule2);
|
||||
}
|
||||
@@ -1,17 +1,3 @@
|
||||
// extern crate serde;
|
||||
// extern crate regex;
|
||||
// use serde::{Deserialize, Serialize};
|
||||
|
||||
// #[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
// #[serde(transparent)]
|
||||
// pub struct Regex(regex::Regex);
|
||||
|
||||
// #[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
// #[serde(remote = "regex::Regex")]
|
||||
// pub struct RegexDef{
|
||||
// #[serde(getter = "regex::Regex::to_string")]
|
||||
// r: String
|
||||
// }
|
||||
use std::{fmt, ops};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -1,128 +0,0 @@
|
||||
extern crate log;
|
||||
use log::{info, error};
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Result};
|
||||
use std::path::Path;
|
||||
|
||||
/// Type of supported settings format: yaml or json
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum SettingsType {
|
||||
/// Files ending with the `yml` or `yaml` extension
|
||||
Yaml,
|
||||
|
||||
/// Files ending with the `json` extension
|
||||
Json,
|
||||
|
||||
/// Other file type, used to describe files without an extension or with an
|
||||
/// unsupported extension
|
||||
Other,
|
||||
}
|
||||
|
||||
/// Read a file’s content into a `String`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// let path = std::path::Path::new("./some/path/to/my/file.json");
|
||||
/// let content = lang_evolve_core::settings::utils::read_file(&path).unwrap();
|
||||
/// ```
|
||||
pub fn read_file(path: &Path) -> Result<String> {
|
||||
let display = path.display();
|
||||
let mut file = match File::open(&path) {
|
||||
Err(why) => {
|
||||
error!("Could not read {}: {}", display, why.to_string());
|
||||
return Err(why);
|
||||
}
|
||||
Ok(file) => file,
|
||||
};
|
||||
let mut content = String::new();
|
||||
match file.read_to_string(&mut content) {
|
||||
Err(why) => {
|
||||
error!("Could not read {}: {}", display, why.to_string());
|
||||
return Err(why);
|
||||
}
|
||||
Ok(_) => {
|
||||
info!("Content of {} read", display);
|
||||
Ok(content)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a `String` into a file
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// let content = String::from("This is my content");
|
||||
/// let path = std::path::Path::new("./path/to/my/file.txt");
|
||||
/// lang_evolve_core::settings::utils::write_file(&path, &content).unwrap();
|
||||
/// ```
|
||||
pub fn write_file<S>(path: &Path, content: &S) -> Result<()>
|
||||
where
|
||||
S: std::string::ToString,
|
||||
{
|
||||
use std::io::prelude::*;
|
||||
let mut file = match File::create(&path) {
|
||||
Err(e) => {
|
||||
error!("Could not open file {}: {}", path.display(), e.to_string());
|
||||
return Err(e);
|
||||
}
|
||||
Ok(file) => file,
|
||||
};
|
||||
match file.write_all(content.to_string().as_bytes()) {
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Could not write settings to file {}: {}",
|
||||
path.display(),
|
||||
e.to_string()
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
Ok(_) => {
|
||||
info!("Wrote settings to file {}", path.display());
|
||||
}
|
||||
};
|
||||
info!("Successfuly written {}", path.display());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the type of file to be opened based on its extension. Currently
|
||||
/// supported file types are:
|
||||
/// * JSON - `.json` extension
|
||||
/// * Yaml - `.yml` or `.yaml` extensions
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `path` - Path of the file to be determined
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// let file_json = std::path::Path::new("file.json");
|
||||
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Json,
|
||||
/// lang_evolve_core::settings::utils::get_file_type(&file_json));
|
||||
///
|
||||
/// let file_yaml = std::path::Path::new("file.yaml");
|
||||
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Yaml,
|
||||
/// lang_evolve_core::settings::utils::get_file_type(&file_yaml));
|
||||
///
|
||||
/// let file_yml = std::path::Path::new("file.yml");
|
||||
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Yaml,
|
||||
/// lang_evolve_core::settings::utils::get_file_type(&file_yml));
|
||||
/// ```
|
||||
pub fn get_file_type(path: &Path) -> SettingsType {
|
||||
let extension = match path.extension() {
|
||||
None => { return SettingsType::Other; }
|
||||
Some(val) => val,
|
||||
};
|
||||
let extension = extension
|
||||
.to_str()
|
||||
.expect("Could not get String out of extension")
|
||||
.to_lowercase();
|
||||
match extension.as_str() {
|
||||
"yml" | "yaml" => SettingsType::Yaml,
|
||||
"json" => SettingsType::Json,
|
||||
_ => SettingsType::Other,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user