Good progress with applying rules, room for improvement
Rules are applied, however some bugs remain: - The HashMap for rules should be replaced by a Vec so they can be stored in order and not randomly - For some reasons, the `%` is not removed from some rules in the private function `update_rules` in the `Settings` struct. - Make it so replacements between square brackets work correctly
This commit is contained in:
parent
bae1d86544
commit
ef8c02fc97
@ -18,7 +18,6 @@ extern crate simplelog;
|
||||
use log::{info, warn};
|
||||
use simplelog::*;
|
||||
|
||||
|
||||
pub mod settings;
|
||||
|
||||
/// Initializes the crate
|
||||
@ -48,6 +47,11 @@ pub fn init() -> std::result::Result<(), log::SetLoggerError> {
|
||||
TerminalMode::Mixed,
|
||||
)
|
||||
.unwrap(),
|
||||
WriteLogger::new(
|
||||
LevelFilter::Debug,
|
||||
Config::default(),
|
||||
File::create("core.log").unwrap(),
|
||||
),
|
||||
WriteLogger::new(
|
||||
LevelFilter::Info,
|
||||
Config::default(),
|
||||
|
@ -4,7 +4,7 @@ extern crate serde_yaml;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
extern crate log;
|
||||
use log::{error, info};
|
||||
use log::{debug, error, info};
|
||||
|
||||
pub mod utils;
|
||||
use utils::SettingsType;
|
||||
@ -125,7 +125,7 @@ pub struct Settings {
|
||||
/// a regex to be matched while the second represents the change
|
||||
/// to be made to the input data.
|
||||
#[serde(default)]
|
||||
pub rules: HashMap<Regex, Regex>,
|
||||
pub rules: HashMap<Regex, String>,
|
||||
}
|
||||
|
||||
/// Representation inside the crate of LangEvolve’s settings.
|
||||
@ -138,9 +138,9 @@ impl Settings {
|
||||
/// let s = lang_evolve_core::settings::Settings::new();
|
||||
/// let content_yaml = r#"---
|
||||
/// version: "1"
|
||||
/// categories: []
|
||||
/// rules: []"#;
|
||||
/// let content_json = r#"{"version":"1","categories":[],"rules":[]}"#;
|
||||
/// categories: {}
|
||||
/// rules: {}"#;
|
||||
/// let content_json = r#"{"version":"1","categories":{},"rules":{}}"#;
|
||||
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
|
||||
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
|
||||
/// ```
|
||||
@ -283,6 +283,73 @@ impl Settings {
|
||||
fn get_ruleset_version() -> String {
|
||||
RULESET_CURRENT_VERSION.to_string()
|
||||
}
|
||||
|
||||
fn update_rules(&self) -> Vec<(Regex, String)> {
|
||||
let mut rules: Vec<(Regex, String)> = Vec::new();
|
||||
for (from, to) in &self.rules {
|
||||
rules.push((from.clone(), to.to_string()));
|
||||
}
|
||||
|
||||
for (category, content) in &self.categories {
|
||||
let mut temp_rules: Vec<(Regex, String)> = Vec::new();
|
||||
for (from, to) in &rules {
|
||||
let from = Regex::new(
|
||||
from.to_string()
|
||||
.replace(
|
||||
category.as_str(),
|
||||
format!("[{}]", content).as_str(),
|
||||
)
|
||||
.replace("%", "")
|
||||
.as_str(),
|
||||
);
|
||||
let to = to.to_string().replace(
|
||||
category,
|
||||
format!("[{}]", content).replace("%", "").as_str(),
|
||||
);
|
||||
temp_rules.push((from, to));
|
||||
}
|
||||
rules = temp_rules.clone();
|
||||
}
|
||||
rules
|
||||
}
|
||||
|
||||
/// Apply list of rules to input
|
||||
///
|
||||
/// The list of rules in the struct will be applied to the input `s`. If the
|
||||
/// rule contains the `%` character followed by a capital letter, this marks
|
||||
/// a category of phonemes and should be replaced by them. For instance, we
|
||||
/// have:
|
||||
/// - the category `C` defined as `bcdfg`
|
||||
/// - the rule `%Ci` to `%Cj`
|
||||
/// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j`
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `s` - Input to modify
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use lang_evolve_core::settings::Settings;
|
||||
/// let settings = Settings::new();
|
||||
/// // add some rules...
|
||||
/// let input = String::new();
|
||||
/// // set some input
|
||||
/// let _output = settings.apply(input);
|
||||
/// ```
|
||||
pub fn apply(&self, s: String) -> String {
|
||||
// Replace all `%C`s by their equivalent
|
||||
let rules = self.update_rules();
|
||||
let mut s = s.clone();
|
||||
debug!("===============================================");
|
||||
for (from, to) in rules {
|
||||
debug!("from: {}\tto: {}", from.to_string(), to);
|
||||
debug!("old: {}", s);
|
||||
s = from.replace_all(&s, to.as_str()).to_string();
|
||||
debug!("new: {}", s);
|
||||
}
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
use std::str::FromStr;
|
||||
@ -301,7 +368,7 @@ impl FromStr for Settings {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::str::FromStr;
|
||||
/// let s = r#"{"version":"1","categories":[],"rules":[]}"#;
|
||||
/// let s = r#"{"version":"1","categories":{},"rules":{}}"#;
|
||||
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
|
||||
/// ```
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
@ -342,8 +409,8 @@ fn write_settings() {
|
||||
let path = std::path::Path::new("test.yaml");
|
||||
let settings = r#"---
|
||||
version: "1"
|
||||
categories: []
|
||||
rules: []"#;
|
||||
categories: {}
|
||||
rules: {}"#;
|
||||
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
|
||||
assert_eq!(settings, utils::read_file(&path).unwrap());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user