Compare commits

..

3 Commits

Author SHA1 Message Date
6fb1c287e0 Rules are now kept sorted
Changed the rules from a HashMap to a Vec
2020-04-04 17:08:26 +02:00
ef8c02fc97 Good progress with applying rules, room for improvement
Rules are applied, however some bugs remain:
- The HashMap for rules should be replaced by a Vec so they can be
  stored in order and not randomly
- For some reasons, the `%` is not removed from some rules in the
  private function `update_rules` in the `Settings` struct.
- Make it so replacements between square brackets work correctly
2020-04-04 15:46:29 +02:00
bae1d86544 Switched from Vectors to HashMaps, need to update docs 2020-03-29 18:10:45 +02:00
3 changed files with 85 additions and 9 deletions

View File

@@ -18,7 +18,6 @@ extern crate simplelog;
use log::{info, warn};
use simplelog::*;
pub mod settings;
/// Initializes the crate
@@ -48,6 +47,11 @@ pub fn init() -> std::result::Result<(), log::SetLoggerError> {
TerminalMode::Mixed,
)
.unwrap(),
WriteLogger::new(
LevelFilter::Debug,
Config::default(),
File::create("core.log").unwrap(),
),
WriteLogger::new(
LevelFilter::Info,
Config::default(),

View File

@@ -4,7 +4,7 @@ extern crate serde_yaml;
use serde::{Deserialize, Serialize};
extern crate log;
use log::{error, info};
use log::{debug, error, info};
pub mod utils;
use utils::SettingsType;
@@ -91,6 +91,7 @@ macro_rules! decode_settings {
};
}
use std::collections::HashMap;
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
@@ -116,7 +117,7 @@ pub struct Settings {
/// phonemes. It is currently not possible to have more than one
/// character to be considered as one sound.
#[serde(default)]
pub categories: Vec<(String, String)>,
pub categories: HashMap<String, String>,
/// Soundchange rules
///
@@ -124,7 +125,7 @@ pub struct Settings {
/// a regex to be matched while the second represents the change
/// to be made to the input data.
#[serde(default)]
pub rules: Vec<(Regex, Regex)>,
pub rules: Vec<(Regex, String)>,
}
/// Representation inside the crate of LangEvolves settings.
@@ -137,9 +138,9 @@ impl Settings {
/// let s = lang_evolve_core::settings::Settings::new();
/// let content_yaml = r#"---
/// version: "1"
/// categories: []
/// categories: {}
/// rules: []"#;
/// let content_json = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let content_json = r#"{"version":"1","categories":{},"rules":[]}"#;
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ```
@@ -148,7 +149,7 @@ impl Settings {
pub fn new() -> Self {
Self {
version: Self::get_ruleset_version(),
categories: Vec::new(),
categories: HashMap::new(),
rules: Vec::new(),
}
}
@@ -282,6 +283,70 @@ impl Settings {
fn get_ruleset_version() -> String {
RULESET_CURRENT_VERSION.to_string()
}
fn update_rules(&self) -> Vec<(Regex, String)> {
let mut rules = self.rules.clone();
for (category, content) in &self.categories {
let mut temp_rules: Vec<(Regex, String)> = Vec::new();
for (from, to) in &rules {
let from = Regex::new(
from.to_string()
.replace(
category.as_str(),
format!("[{}]", content).as_str(),
)
.replace("%", "")
.as_str(),
);
let to = to.to_string().replace(
category,
format!("[{}]", content).replace("%", "").as_str(),
);
temp_rules.push((from, to));
}
rules = temp_rules.clone();
}
rules
}
/// Apply list of rules to input
///
/// The list of rules in the struct will be applied to the input `s`. If the
/// rule contains the `%` character followed by a capital letter, this marks
/// a category of phonemes and should be replaced by them. For instance, we
/// have:
/// - the category `C` defined as `bcdfg`
/// - the rule `%Ci` to `%Cj`
/// The rule should be rewritten as `[bcdfg]` to `[bcdfg]j`
///
/// # Arguments
///
/// * `s` - Input to modify
///
/// # Example
///
/// ```
/// # use lang_evolve_core::settings::Settings;
/// let settings = Settings::new();
/// // add some rules...
/// let input = String::new();
/// // set some input
/// let _output = settings.apply(input);
/// ```
pub fn apply(&self, s: String) -> String {
// Replace all `%C`s by their equivalent
let rules = self.update_rules();
let mut s = s.clone();
debug!("===============================================");
for (from, to) in rules {
debug!("from: {}\tto: {}", from.to_string(), to);
debug!("old: {}", s);
s = from.replace_all(&s, to.as_str()).to_string();
debug!("new: {}", s);
}
s
}
}
use std::str::FromStr;
@@ -300,7 +365,7 @@ impl FromStr for Settings {
///
/// ```
/// # use std::str::FromStr;
/// let s = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let s = r#"{"version":"1","categories":{},"rules":[]}"#;
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -341,7 +406,7 @@ fn write_settings() {
let path = std::path::Path::new("test.yaml");
let settings = r#"---
version: "1"
categories: []
categories: {}
rules: []"#;
utils::write_file(&path, &serde_yaml::to_string(&s).unwrap()).unwrap();
assert_eq!(settings, utils::read_file(&path).unwrap());

View File

@@ -31,6 +31,13 @@ impl Regex {
}
}
use std::hash::{Hash, Hasher};
impl Hash for Regex {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state);
}
}
impl ops::Deref for Regex {
type Target = regex::Regex;
fn deref(&self) -> &regex::Regex {