summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJanito Vaqueiro Ferreira Filho <janito@mullvad.net>2021-05-26 18:37:41 +0000
committerJanito Vaqueiro Ferreira Filho <janito@mullvad.net>2021-05-28 11:54:59 +0000
commit5ea54894c4b69cff326048976b94769f6e02b8c2 (patch)
tree7f7dfe56a336e0893df42061a85043eb7bd44f0f
parent459777052f4f60d71fee43f03848346a5fe615d6 (diff)
downloadmullvadvpn-5ea54894c4b69cff326048976b94769f6e02b8c2.tar.xz
mullvadvpn-5ea54894c4b69cff326048976b94769f6e02b8c2.zip
Move `Parser` into a separate module
Prepare for refactoring the parsing logic to improve readability and maintainability.
-rw-r--r--android/translations-converter/src/gettext/messages.rs115
-rw-r--r--android/translations-converter/src/gettext/mod.rs1
-rw-r--r--android/translations-converter/src/gettext/parser.rs138
3 files changed, 145 insertions, 109 deletions
diff --git a/android/translations-converter/src/gettext/messages.rs b/android/translations-converter/src/gettext/messages.rs
index de6c18e11a..d6d8a10b64 100644
--- a/android/translations-converter/src/gettext/messages.rs
+++ b/android/translations-converter/src/gettext/messages.rs
@@ -1,10 +1,8 @@
-use super::{msg_string::MsgString, parse_line, plural_form::PluralForm};
+use super::{msg_string::MsgString, parser::Parser, plural_form::PluralForm};
use derive_more::{Display, Error, From};
use std::{
- collections::BTreeMap,
fs::File,
io::{BufRead, BufReader},
- mem,
path::Path,
};
@@ -35,117 +33,16 @@ pub enum MsgValue {
impl Messages {
/// Load message entries from a gettext translation file.
///
- /// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed
- /// that the header value is one of some hard-coded values, so if new languages that have new
- /// plurals are added, the code will have to be updated.
- ///
- /// An gettext translation file has the format in the example below:
- ///
- /// ```
- /// # The start of the file can contain empty entries to include some header with meta
- /// # information. Below is the header indicating the plural format.
- /// msgid ""
- /// msgstr ""
- /// "Plural-Forms: nplurals=2; plural=(n != 1);"
- ///
- /// # Simple translated messages
- /// msgid "Message in original language"
- /// msgstr "Mesaĝo en tradukita lingvo"
- ///
- /// # Plural translated messages (with two forms)
- /// msgid "One translated message"
- /// msgid_plural "%d translated messages"
- /// msgstr[0] "Unu tradukita mesaĝo"
- /// msgstr[1] "%d tradukitaj mesaĝoj"
- /// ```
+ /// See [`Parser`] for more information.
pub fn from_file(file_path: impl AsRef<Path>) -> Result<Self, Error> {
- let mut parsing_header = false;
- let mut entries = Vec::new();
- let mut current_id = None;
- let mut current_plural_id = None;
- let mut plural_form = None;
- let mut variants = BTreeMap::new();
-
let file = BufReader::new(File::open(file_path).expect("Failed to open gettext file"));
- // Ensure there's an empty line at the end so that the "else" part of the string matching
- // code will run for the last message in the file.
- let lines = file.lines().chain(Some(Ok(String::new())));
-
- for line_result in lines {
- let line = line_result?;
-
- match_str! { (line.trim())
- ["msgid \"", msg_id, "\""] => {
- current_id = Some(MsgString::from_escaped(msg_id));
- },
- ["msgstr \"", translation, "\""] => {
- if let Some(id) = current_id.take() {
- let value = MsgValue::Invariant(MsgString::from_escaped(translation));
-
- parsing_header = id.is_empty() && translation.is_empty();
-
- entries.push(MsgEntry { id, value });
- }
-
- current_id = None;
- current_plural_id = None;
- },
- ["msgid_plural \"", plural_id, "\""] => {
- current_plural_id = Some(MsgString::from_escaped(plural_id));
- parsing_header = false;
- },
- ["msgstr[", plural_translation, "\""] => {
- let variant_id_end = plural_translation
- .chars()
- .position(|character| character == ']')
- .expect("Invalid plural msgstr");
- let variant_id: usize = plural_translation[..variant_id_end]
- .parse()
- .expect("Invalid variant index");
- let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "")
- .expect("Invalid plural msgstr");
-
- variants.insert(variant_id, MsgString::from_escaped(variant_msg));
- parsing_header = false;
- },
- ["\"", header, "\\n\""] => {
- if parsing_header {
- if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") {
- plural_form = PluralForm::from_formula(plural_formula);
- }
- }
- },
- _ => {
- if let Some(plural_id) = current_plural_id.take() {
- let id = current_id.take().expect("Missing msgid for plural message");
- let values = mem::replace(&mut variants, BTreeMap::new())
- .into_iter()
- .enumerate()
- .inspect(|(index, (variant_id, _))| {
- assert_eq!(
- index, variant_id,
- "Unexpected variant ID for plural msgstr"
- )
- })
- .map(|(_, (_, value))| value)
- .collect();
- let value = MsgValue::Plural { plural_id, values };
-
- entries.push(MsgEntry { id, value });
- }
+ let mut parser = Parser::new();
- current_id = None;
- current_plural_id = None;
- variants.clear();
- parsing_header = false;
- },
- }
+ for line in file.lines() {
+ parser.parse_line(&line?);
}
- Ok(Messages {
- entries,
- plural_form,
- })
+ Ok(parser.finish())
}
/// Construct an empty messages list configured with the specified plural form.
diff --git a/android/translations-converter/src/gettext/mod.rs b/android/translations-converter/src/gettext/mod.rs
index 59fcfe0b70..cbf81d5717 100644
--- a/android/translations-converter/src/gettext/mod.rs
+++ b/android/translations-converter/src/gettext/mod.rs
@@ -2,6 +2,7 @@
mod match_str;
mod messages;
mod msg_string;
+mod parser;
mod plural_form;
use std::{
diff --git a/android/translations-converter/src/gettext/parser.rs b/android/translations-converter/src/gettext/parser.rs
new file mode 100644
index 0000000000..38d2c53a37
--- /dev/null
+++ b/android/translations-converter/src/gettext/parser.rs
@@ -0,0 +1,138 @@
+use super::{messages::Messages, msg_string::MsgString, parse_line, PluralForm};
+use std::{collections::BTreeMap, mem};
+
+/// A gettext messages file parser.
+///
+/// Can parse both translations files and template files.
+///
+/// # Usage
+///
+/// The parser works by parsing individual lines. After creating a [`Parser`] instance, the input
+/// lines should be sent to it through repeated calls to [`Parser::parse_line`], and afterwards
+/// calling [`Parser::finish`] to finish parsing and obtain the parsed result.
+///
+/// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed
+/// that the header value is one of some hard-coded values, so if new languages that have new
+/// plurals are added, the code will have to be updated.
+///
+/// # Input example
+///
+/// A gettext translation file has the format in the example below:
+///
+/// ```
+/// # The start of the file can contain empty entries to include some header with meta
+/// # information. Below is the header indicating the plural format.
+/// msgid ""
+/// msgstr ""
+/// "Plural-Forms: nplurals=2; plural=(n != 1);"
+///
+/// # Simple translated messages
+/// msgid "Message in original language"
+/// msgstr "Mesaĝo en tradukita lingvo"
+///
+/// # Plural translated messages (with two forms)
+/// msgid "One translated message"
+/// msgid_plural "%d translated messages"
+/// msgstr[0] "Unu tradukita mesaĝo"
+/// msgstr[1] "%d tradukitaj mesaĝoj"
+/// ```
+#[derive(Debug)]
+pub struct Parser {
+ parsing_header: bool,
+ messages: Messages,
+ current_id: Option<MsgString>,
+ current_plural_id: Option<MsgString>,
+ variants: BTreeMap<usize, MsgString>,
+}
+
+impl Parser {
+ /// Create a new [`Parser`] instance.
+ ///
+ /// Parsing can then be done by feeding lines to the instance using [`Parser::parse_line`] and
+ /// finishing with a call to [`Parser::finish`] to obtain the parsed result.
+ pub fn new() -> Self {
+ Parser {
+ parsing_header: false,
+ messages: Messages::default(),
+ current_id: None,
+ current_plural_id: None,
+ variants: BTreeMap::new(),
+ }
+ }
+
+ /// Parse an input line.
+ ///
+ /// # Panics
+ ///
+ /// The method will panic if the line can not be parsed.
+ pub fn parse_line(&mut self, line: &str) {
+ match_str! { (line.trim())
+ ["msgid \"", msg_id, "\""] => {
+ self.current_id = Some(MsgString::from_escaped(msg_id));
+ },
+ ["msgstr \"", translation, "\""] => {
+ if let Some(id) = self.current_id.take() {
+ self.parsing_header = id.is_empty() && translation.is_empty();
+ self.messages.add(id, MsgString::from_escaped(translation));
+ }
+
+ self.current_id = None;
+ self.current_plural_id = None;
+ },
+ ["msgid_plural \"", plural_id, "\""] => {
+ self.current_plural_id = Some(MsgString::from_escaped(plural_id));
+ self.parsing_header = false;
+ },
+ ["msgstr[", plural_translation, "\""] => {
+ let variant_id_end = plural_translation
+ .chars()
+ .position(|character| character == ']')
+ .expect("Invalid plural msgstr");
+ let variant_id: usize = plural_translation[..variant_id_end]
+ .parse()
+ .expect("Invalid variant index");
+ let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "")
+ .expect("Invalid plural msgstr");
+
+ self.variants.insert(variant_id, MsgString::from_escaped(variant_msg));
+ self.parsing_header = false;
+ },
+ ["\"", header, "\\n\""] => {
+ if self.parsing_header {
+ if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") {
+ self.messages.plural_form = PluralForm::from_formula(plural_formula);
+ }
+ }
+ },
+ _ => {
+ if let Some(plural_id) = self.current_plural_id.take() {
+ let id = self.current_id.take().expect("Missing msgid for plural message");
+ let values = mem::replace(&mut self.variants, BTreeMap::new())
+ .into_iter()
+ .enumerate()
+ .inspect(|(index, (variant_id, _))| {
+ assert_eq!(
+ index, variant_id,
+ "Unexpected variant ID for plural msgstr"
+ )
+ })
+ .map(|(_, (_, value))| value)
+ .collect();
+
+ self.messages.add_plural(id, plural_id, values);
+ }
+
+ self.current_id = None;
+ self.current_plural_id = None;
+ self.variants.clear();
+ self.parsing_header = false;
+ },
+ }
+ }
+
+ /// Finish parsing and obtain the parsed [`Messages].
+ pub fn finish(mut self) -> Messages {
+ self.parse_line("");
+ self.messages
+ }
+}