Compare commits

..

12 Commits

Author SHA1 Message Date
6be0f7e8f6 Switched rules to Regexes, added Regex wrapper for serde 2020-03-29 03:10:19 +02:00
d13836e433 Removed comments, made Settings members public, doc 2020-03-29 03:09:48 +02:00
f3e672e29c Organized imports 2020-03-29 03:07:26 +02:00
c418323b5c Removed unused uses 2020-03-29 03:06:46 +02:00
c6baa46aca Removed import_input
This should be left to the client crate, `settings::utils::read_file`
is already here for that
2020-03-28 22:38:28 +01:00
18eb16e777 Rewritten some parts as macros, updated extension handling
Encoding and decoding of `settings::Settings` now handled in macros to
avoid code repetition. I also wish rewrite them both in a combined
macro that would either encode or decode the struct depending on how
it was called.

Replaced some log warnings with errors

Now if type extension is not valid, the code will still attempt to
decode it, first as JSON data, then as Yaml data. If both fail, then
an error is returned. For this, I added the Display trait to the
`settings::Settings` struct which allows the FromStr trait, and due
to conflicting implementation, I removed the From trait and
implemented manually the `from` method with the same signature -- the
struct just lacks the trait that comes with it.
2020-03-28 22:03:21 +01:00
d200367ee0 Updated logger options, changed filetype behavior
Replaced lots of warns by errors

Filetype detection won’t return an error on unknown extension anymore,
but will return an `Other` value so it can be handled by other
functions not as an error already. See upcoming commits.
2020-03-28 21:59:23 +01:00
e9a161f526 Updated import_input signature, documentation 2020-03-28 21:58:27 +01:00
a7ef031090 Final doc before I go to sleep 2020-03-27 23:31:24 +01:00
9f5c040893 Added documentation 2020-03-27 23:27:00 +01:00
7d0a371311 Changed ruleset variable type to i32 2020-03-27 23:26:16 +01:00
e00f489f55 Added From trait to Settings 2020-03-27 23:25:16 +01:00
5 changed files with 354 additions and 97 deletions

View File

@@ -7,8 +7,12 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
# Logger
log = "0.4"
simplelog = "0.7"
# Struct serializing and deserializing
serde = {version = "1.0", features = ["derive"]} serde = {version = "1.0", features = ["derive"]}
serde_yaml = "0.7" serde_yaml = "0.7"
serde_json = "1.0" serde_json = "1.0"
log = "0.4" # Regex support
simplelog = "0.7" regex = "1.3"

View File

@@ -13,16 +13,13 @@
//! user-defined sound changes to words and texts based on regex expressions. //! user-defined sound changes to words and texts based on regex expressions.
use std::fs::File; use std::fs::File;
use std::io::Result;
use std::path::PathBuf;
extern crate log; extern crate log;
extern crate simplelog; extern crate simplelog;
use log::{info, warn}; use log::{info, warn};
use simplelog::*; use simplelog::*;
pub mod settings; pub mod settings;
use settings::utils;
/// Initializes the crate /// Initializes the crate
/// ///
@@ -67,8 +64,3 @@ pub fn init() -> std::result::Result<(), log::SetLoggerError> {
} }
} }
} }
/// Import user input from a text file and return them as a String
pub fn import_input(path: PathBuf) -> Result<String> {
utils::read_file(&path)
}

View File

@@ -4,28 +4,132 @@ extern crate serde_yaml;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
extern crate log; extern crate log;
use log::warn; use log::{error, info};
// mod utils;
pub mod utils; pub mod utils;
use utils::SettingsType; use utils::SettingsType;
#[allow(dead_code)] pub mod regex_wrapper;
const RULESET_CURRENT_VERSION: &'static str = "1"; use regex_wrapper::Regex;
#[derive(Debug, Deserialize, Serialize)] /// Current version of the ruleset. It will help determine if the ruleset is
/// outdated or from a more recent version of the software than the one being in
/// use.
pub const RULESET_CURRENT_VERSION: i32 = 1;
/// Encode a [`Settings`] struct to a filetype, returns a
/// `std::result::Result<std::string::String, std::io::Error>`
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, must implement `to_string`
/// * `content` - content to encode, must be `Settings` struct
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::settings::*;
/// # use lang_evolve_core::encode_settings;
/// use std::io::{Error, ErrorKind};
/// use std::path::Path;
/// let filetype = utils::get_file_type(Path::new("./path/to/file.json"));
/// let s = Settings::new();
/// let content = match filetype {
/// utils::SettingsType::Yaml => encode_settings!(serde_yaml, &s).unwrap(),
/// utils::SettingsType::Json => encode_settings!(serde_json, &s).unwrap(),
/// _ => panic!("Could not encode settings"),
/// };
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
#[macro_export(local_inner_macros)]
macro_rules! encode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::to_string($content) {
Err(e) => {
log::error!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
}
};
}
/// Decode a [`Settings`] struct from a `std::std::String`, returns a
/// std::result::Result<lang_evolve_core::settings::Settings, std::io::Error>
///
/// # Arguments
///
/// * `funcrate` - `serde`-compatible crate to use, mus implement `from_string`
/// * `content` - `&str` to decode into a [`Settings`]
///
/// # Example
///
/// ```ignore
/// # use lang_evolve_core::decode_settings;
/// let str = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = decode_settings!(serde_json, str);
/// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
#[macro_export(local_inner_macros)]
macro_rules! decode_settings {
($funcrate:ident, $content:expr) => {
match $funcrate::from_str($content) {
Err(e) => {
log::error!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}
};
}
/// Representation of the softwares settings
///
/// This struct represents all the settings the software has to follow
/// while running, which includes the phoneme categories as well as
/// the soundchange rules to apply to the input text.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Settings { pub struct Settings {
/// Represents the version of the loaded ruleset.
///
/// It is used to detect obsolete ruleset representations or if a
/// loaded ruleset comes from a newer version of lang_evolve_core
/// than the one used by the user.
#[serde(default = "Settings::get_ruleset_version")] #[serde(default = "Settings::get_ruleset_version")]
version: String, pub version: String,
/// Categories of phonemes
///
/// This is a vector of categories of phonemes, represented
/// themselves as pairs of strings. Each pair of strings has its
/// first element represent the name of the category, which is
/// generally represented by a single capital letter. The second
/// element is a string where all its characters represent
/// phonemes. It is currently not possible to have more than one
/// character to be considered as one sound.
#[serde(default)] #[serde(default)]
categories: Vec<(String, String)>, pub categories: Vec<(String, String)>,
/// Soundchange rules
///
/// This is a vector of pairs of strings, the first one represents
/// a regex to be matched while the second represents the change
/// to be made to the input data.
#[serde(default)] #[serde(default)]
rules: Vec<(String, String)>, pub rules: Vec<(Regex, Regex)>,
} }
/// Representation inside the crate of LangEvolves settings. /// Representation inside the crate of LangEvolves settings.
impl Settings { impl Settings {
/// Creates a new empty instance of `Settings` /// Creates a new empty instance of [`Settings`]
/// ///
/// # Example /// # Example
/// ///
@@ -39,6 +143,8 @@ impl Settings {
/// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap()); /// assert_eq!(content_yaml, serde_yaml::to_string(&s).unwrap());
/// assert_eq!(content_json, serde_json::to_string(&s).unwrap()); /// assert_eq!(content_json, serde_json::to_string(&s).unwrap());
/// ``` /// ```
///
/// [`Settings`]: ./settings/struct.Settings.html
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
version: Self::get_ruleset_version(), version: Self::get_ruleset_version(),
@@ -47,12 +153,11 @@ impl Settings {
} }
} }
/// Import settings from an imput file. The currently allowed file formats /// Import settings from an imput file.
/// are: ///
/// - JSON - with the `.json` extension /// The currently allowed file formats are described in the
/// - Yaml - with the `.yaml` or `.yml` extension /// [`utils::SettingsType`] enum. If the ruleset version is higher than the
/// The format will be automatically detected based on the filename /// current version (see [`Settings.version`]), then an error is returned.
/// extension.
/// ///
/// # Arguments /// # Arguments
/// ///
@@ -72,50 +177,67 @@ impl Settings {
/// let path_yml = Path::new("settings.yml"); /// let path_yml = Path::new("settings.yml");
/// let _s_yml = Settings::import(&path_yml).unwrap(); /// let _s_yml = Settings::import(&path_yml).unwrap();
/// ``` /// ```
///
/// [`utils::SettingsTYpe`]: ./utils/enum.SettingsType.html
/// [`Settings.version`]: ./struct.Settings.html#structfield.version
pub fn import(path: &std::path::Path) -> std::io::Result<Self> { pub fn import(path: &std::path::Path) -> std::io::Result<Self> {
use SettingsType::*; use utils::SettingsType::{Json, Yaml};
let display = path.display(); let file_type = utils::get_file_type(&path);
let file_type = utils::get_file_type(&path).unwrap(); let content = utils::read_file(&path)?;
let content = match utils::read_file(&path) {
Err(e) => {
warn!("Could not read file {}: {}", display, e.to_string());
return Err(e);
}
Ok(content) => content,
};
let settings: Settings = match file_type { let settings: Settings = match file_type {
Yaml => match serde_yaml::from_str(&content) { Yaml => decode_settings!(serde_yaml, &content),
Json => decode_settings!(serde_json, &content),
// Attempt to decode anyway
_ => match Settings::from_str(&content.as_str()) {
Ok(val) => val,
Err(e) => { Err(e) => {
warn!("Could not import settings: {}", e.to_string()); error!(
"Could not decode input {}: {}",
content,
e.to_string()
);
return Err(std::io::Error::new( return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput, std::io::ErrorKind::InvalidData,
e, e,
)); ));
} }
Ok(val) => val,
},
Json => match serde_json::from_str(&content) {
Err(e) => {
warn!("Could not import settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
e,
));
}
Ok(val) => val,
}, },
}; };
if settings.version > Self::get_ruleset_version() {
error!("Ruleset version too high!");
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Ruleset version too high!",
));
}
info!("Successfuly imported {}", path.display());
Ok(settings) Ok(settings)
} }
/// Export the current rules to a file. The allowed file formats are either /// Import settings from file path described by the argument `path`
/// a YAML file or a Json file, hence the allowed filename extension are: ///
/// * "yml" or "yaml" for Yaml files /// # Arguments
/// * "json" for Json files ///
/// The format is detected automatically depending on the extension of the /// * `path` - path to the file from which settings should be imported
/// filename. ///
/// # Example
///
/// ```no_run
/// let s = lang_evolve_core::settings::Settings::from("settings.yml");
/// ```
pub fn from<S>(s: S) -> Self
where
S: ToString,
{
let s = s.to_string();
let path = std::path::Path::new(&s);
Self::import(&path).unwrap()
}
/// Export current settings to a file.
///
/// The allowed file formats are described in the [`SettingsType`] enum.
/// ///
/// # Arguments /// # Arguments
/// ///
@@ -137,36 +259,69 @@ impl Settings {
/// let path_yml = Path::new("./output.yml"); /// let path_yml = Path::new("./output.yml");
/// s.export(&path_yml).unwrap(); /// s.export(&path_yml).unwrap();
/// ``` /// ```
///
/// [`SettingsType`]: ./utils/enum.SettingsType.html
pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> { pub fn export(&self, path: &std::path::Path) -> std::io::Result<()> {
let filetype = utils::get_file_type(&path).unwrap(); let filetype = utils::get_file_type(&path);
let content = match filetype { let content = match filetype {
SettingsType::Yaml => match serde_yaml::to_string(&self) { SettingsType::Yaml => encode_settings!(serde_yaml, &self),
Err(e) => { SettingsType::Json => encode_settings!(serde_json, &self),
warn!("Could not serialize settings: {}", e.to_string()); _ => {
error!("Unknown filetype {}", path.to_str().unwrap());
return Err(std::io::Error::new( return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData, std::io::ErrorKind::InvalidData,
e, "Unknown file type",
)); ));
} }
Ok(val) => val,
},
SettingsType::Json => match serde_json::to_string(&self) {
Err(e) => {
warn!("Could not serialize settings: {}", e.to_string());
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
e,
));
}
Ok(val) => val,
},
}; };
info!("Successfuly exported settings to {}", path.display());
utils::write_file(&path, &content) utils::write_file(&path, &content)
} }
/// Get the current ruleset version of LangEvolve. /// Get the current ruleset version of LangEvolve.
fn get_ruleset_version() -> String { fn get_ruleset_version() -> String {
String::from(RULESET_CURRENT_VERSION) RULESET_CURRENT_VERSION.to_string()
}
}
use std::str::FromStr;
impl FromStr for Settings {
type Err = serde_yaml::Error;
/// Decode a litteral string into a `Settings` struct. Works only for
/// supported file types described in `SettingsType`. It will try to decode
/// the input `s` by any mean known by `SettingsType`.
///
/// # Arguments
///
/// * `s` - litteral string to decode into a `Settings` struct
///
/// # Example
///
/// ```
/// # use std::str::FromStr;
/// let s = r#"{"version":"1","categories":[],"rules":[]}"#;
/// let settings = lang_evolve_core::settings::Settings::from_str(s).unwrap();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
match serde_json::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(_) => match serde_yaml::from_str::<Settings>(s) {
Ok(val) => Ok(val),
Err(e) => {
error!("Could not decode input {}: {}", s, e.to_string());
return Err(e);
}
},
}
}
}
use std::fmt;
use std::fmt::Display;
impl Display for Settings {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", serde_json::to_string(&self).unwrap())
} }
} }

View File

@@ -0,0 +1,84 @@
// extern crate serde;
// extern crate regex;
// use serde::{Deserialize, Serialize};
// #[derive(Clone, Debug, Deserialize, Serialize)]
// #[serde(transparent)]
// pub struct Regex(regex::Regex);
// #[derive(Clone, Debug, Deserialize, Serialize)]
// #[serde(remote = "regex::Regex")]
// pub struct RegexDef{
// #[serde(getter = "regex::Regex::to_string")]
// r: String
// }
use std::{fmt, ops};
#[derive(Clone, Debug)]
pub struct Regex(regex::Regex);
impl Regex {
pub fn new(s: &str) -> Self {
Self(regex::Regex::new(s).unwrap())
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
pub fn to_string(&self) -> String {
self.0.to_string()
}
}
impl ops::Deref for Regex {
type Target = regex::Regex;
fn deref(&self) -> &regex::Regex {
&self.0
}
}
impl<'de> serde::Deserialize<'de> for Regex {
fn deserialize<D>(de: D) -> Result<Regex, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{Error, Visitor};
struct RegexVisitor;
impl<'de> Visitor<'de> for RegexVisitor {
type Value = Regex;
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("a regular expression pattern")
}
fn visit_str<E: Error>(self, v: &str) -> Result<Regex, E> {
regex::Regex::new(v)
.map(Regex)
.map_err(|err| E::custom(err.to_string()))
}
}
de.deserialize_str(RegexVisitor)
}
}
use serde::{Serialize, Serializer};
impl Serialize for Regex {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.0.as_str())
}
}
impl PartialEq for Regex {
fn eq(&self, other: &Self) -> bool {
self.0.to_string() == other.0.to_string()
}
}
impl Eq for Regex {}

View File

@@ -1,17 +1,22 @@
extern crate log; extern crate log;
use log::{info, warn}; use log::{info, error};
use std::fs::File; use std::fs::File;
use std::io::{Read, Result}; use std::io::{Read, Result};
use std::path::Path; use std::path::Path;
/// Type of supported settings format: yaml or json /// Type of supported settings format: yaml or json
#[derive(Debug, PartialEq)]
pub enum SettingsType { pub enum SettingsType {
/// Files ending with the `yml` or `yaml` extension /// Files ending with the `yml` or `yaml` extension
Yaml, Yaml,
/// Files ending with the `json` extension /// Files ending with the `json` extension
Json, Json,
/// Other file type, used to describe files without an extension or with an
/// unsupported extension
Other,
} }
/// Read a files content into a `String` /// Read a files content into a `String`
@@ -26,7 +31,7 @@ pub fn read_file(path: &Path) -> Result<String> {
let display = path.display(); let display = path.display();
let mut file = match File::open(&path) { let mut file = match File::open(&path) {
Err(why) => { Err(why) => {
warn!("Could not read {}: {}", display, why.to_string()); error!("Could not read {}: {}", display, why.to_string());
return Err(why); return Err(why);
} }
Ok(file) => file, Ok(file) => file,
@@ -34,7 +39,7 @@ pub fn read_file(path: &Path) -> Result<String> {
let mut content = String::new(); let mut content = String::new();
match file.read_to_string(&mut content) { match file.read_to_string(&mut content) {
Err(why) => { Err(why) => {
warn!("Could not read {}: {}", display, why.to_string()); error!("Could not read {}: {}", display, why.to_string());
return Err(why); return Err(why);
} }
Ok(_) => { Ok(_) => {
@@ -60,14 +65,14 @@ where
use std::io::prelude::*; use std::io::prelude::*;
let mut file = match File::create(&path) { let mut file = match File::create(&path) {
Err(e) => { Err(e) => {
warn!("Could not open file {}: {}", path.display(), e.to_string()); error!("Could not open file {}: {}", path.display(), e.to_string());
return Err(e); return Err(e);
} }
Ok(file) => file, Ok(file) => file,
}; };
match file.write_all(content.to_string().as_bytes()) { match file.write_all(content.to_string().as_bytes()) {
Err(e) => { Err(e) => {
warn!( error!(
"Could not write settings to file {}: {}", "Could not write settings to file {}: {}",
path.display(), path.display(),
e.to_string() e.to_string()
@@ -78,17 +83,37 @@ where
info!("Wrote settings to file {}", path.display()); info!("Wrote settings to file {}", path.display());
} }
}; };
info!("Successfuly written {}", path.display());
Ok(()) Ok(())
} }
pub fn get_file_type(path: &Path) -> Result<SettingsType> { /// Get the type of file to be opened based on its extension. Currently
/// supported file types are:
/// * JSON - `.json` extension
/// * Yaml - `.yml` or `.yaml` extensions
///
/// # Arguments
///
/// - `path` - Path of the file to be determined
///
/// # Example
///
/// ```
/// let file_json = std::path::Path::new("file.json");
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Json,
/// lang_evolve_core::settings::utils::get_file_type(&file_json));
///
/// let file_yaml = std::path::Path::new("file.yaml");
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Yaml,
/// lang_evolve_core::settings::utils::get_file_type(&file_yaml));
///
/// let file_yml = std::path::Path::new("file.yml");
/// assert_eq!(lang_evolve_core::settings::utils::SettingsType::Yaml,
/// lang_evolve_core::settings::utils::get_file_type(&file_yml));
/// ```
pub fn get_file_type(path: &Path) -> SettingsType {
let extension = match path.extension() { let extension = match path.extension() {
None => { None => { return SettingsType::Other; }
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"File has no extension",
));
}
Some(val) => val, Some(val) => val,
}; };
let extension = extension let extension = extension
@@ -96,11 +121,8 @@ pub fn get_file_type(path: &Path) -> Result<SettingsType> {
.expect("Could not get String out of extension") .expect("Could not get String out of extension")
.to_lowercase(); .to_lowercase();
match extension.as_str() { match extension.as_str() {
"yml" | "yaml" => Ok(SettingsType::Yaml), "yml" | "yaml" => SettingsType::Yaml,
"json" => Ok(SettingsType::Json), "json" => SettingsType::Json,
_ => Err(std::io::Error::new( _ => SettingsType::Other,
std::io::ErrorKind::InvalidInput,
"Invalid extension",
)),
} }
} }