diff options
| author | Janito Vaqueiro Ferreira Filho <janito@mullvad.net> | 2021-05-26 18:37:41 +0000 |
|---|---|---|
| committer | Janito Vaqueiro Ferreira Filho <janito@mullvad.net> | 2021-05-28 11:54:59 +0000 |
| commit | 5ea54894c4b69cff326048976b94769f6e02b8c2 (patch) | |
| tree | 7f7dfe56a336e0893df42061a85043eb7bd44f0f | |
| parent | 459777052f4f60d71fee43f03848346a5fe615d6 (diff) | |
| download | mullvadvpn-5ea54894c4b69cff326048976b94769f6e02b8c2.tar.xz mullvadvpn-5ea54894c4b69cff326048976b94769f6e02b8c2.zip | |
Move `Parser` into a separate module
Prepare for refactoring the parsing logic to improve readability and
maintainability.
| -rw-r--r-- | android/translations-converter/src/gettext/messages.rs | 115 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/mod.rs | 1 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/parser.rs | 138 |
3 files changed, 145 insertions, 109 deletions
diff --git a/android/translations-converter/src/gettext/messages.rs b/android/translations-converter/src/gettext/messages.rs index de6c18e11a..d6d8a10b64 100644 --- a/android/translations-converter/src/gettext/messages.rs +++ b/android/translations-converter/src/gettext/messages.rs @@ -1,10 +1,8 @@ -use super::{msg_string::MsgString, parse_line, plural_form::PluralForm}; +use super::{msg_string::MsgString, parser::Parser, plural_form::PluralForm}; use derive_more::{Display, Error, From}; use std::{ - collections::BTreeMap, fs::File, io::{BufRead, BufReader}, - mem, path::Path, }; @@ -35,117 +33,16 @@ pub enum MsgValue { impl Messages { /// Load message entries from a gettext translation file. /// - /// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed - /// that the header value is one of some hard-coded values, so if new languages that have new - /// plurals are added, the code will have to be updated. - /// - /// An gettext translation file has the format in the example below: - /// - /// ``` - /// # The start of the file can contain empty entries to include some header with meta - /// # information. Below is the header indicating the plural format. - /// msgid "" - /// msgstr "" - /// "Plural-Forms: nplurals=2; plural=(n != 1);" - /// - /// # Simple translated messages - /// msgid "Message in original language" - /// msgstr "Mesaĝo en tradukita lingvo" - /// - /// # Plural translated messages (with two forms) - /// msgid "One translated message" - /// msgid_plural "%d translated messages" - /// msgstr[0] "Unu tradukita mesaĝo" - /// msgstr[1] "%d tradukitaj mesaĝoj" - /// ``` + /// See [`Parser`] for more information. pub fn from_file(file_path: impl AsRef<Path>) -> Result<Self, Error> { - let mut parsing_header = false; - let mut entries = Vec::new(); - let mut current_id = None; - let mut current_plural_id = None; - let mut plural_form = None; - let mut variants = BTreeMap::new(); - let file = BufReader::new(File::open(file_path).expect("Failed to open gettext file")); - // Ensure there's an empty line at the end so that the "else" part of the string matching - // code will run for the last message in the file. - let lines = file.lines().chain(Some(Ok(String::new()))); - - for line_result in lines { - let line = line_result?; - - match_str! { (line.trim()) - ["msgid \"", msg_id, "\""] => { - current_id = Some(MsgString::from_escaped(msg_id)); - }, - ["msgstr \"", translation, "\""] => { - if let Some(id) = current_id.take() { - let value = MsgValue::Invariant(MsgString::from_escaped(translation)); - - parsing_header = id.is_empty() && translation.is_empty(); - - entries.push(MsgEntry { id, value }); - } - - current_id = None; - current_plural_id = None; - }, - ["msgid_plural \"", plural_id, "\""] => { - current_plural_id = Some(MsgString::from_escaped(plural_id)); - parsing_header = false; - }, - ["msgstr[", plural_translation, "\""] => { - let variant_id_end = plural_translation - .chars() - .position(|character| character == ']') - .expect("Invalid plural msgstr"); - let variant_id: usize = plural_translation[..variant_id_end] - .parse() - .expect("Invalid variant index"); - let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "") - .expect("Invalid plural msgstr"); - - variants.insert(variant_id, MsgString::from_escaped(variant_msg)); - parsing_header = false; - }, - ["\"", header, "\\n\""] => { - if parsing_header { - if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") { - plural_form = PluralForm::from_formula(plural_formula); - } - } - }, - _ => { - if let Some(plural_id) = current_plural_id.take() { - let id = current_id.take().expect("Missing msgid for plural message"); - let values = mem::replace(&mut variants, BTreeMap::new()) - .into_iter() - .enumerate() - .inspect(|(index, (variant_id, _))| { - assert_eq!( - index, variant_id, - "Unexpected variant ID for plural msgstr" - ) - }) - .map(|(_, (_, value))| value) - .collect(); - let value = MsgValue::Plural { plural_id, values }; - - entries.push(MsgEntry { id, value }); - } + let mut parser = Parser::new(); - current_id = None; - current_plural_id = None; - variants.clear(); - parsing_header = false; - }, - } + for line in file.lines() { + parser.parse_line(&line?); } - Ok(Messages { - entries, - plural_form, - }) + Ok(parser.finish()) } /// Construct an empty messages list configured with the specified plural form. diff --git a/android/translations-converter/src/gettext/mod.rs b/android/translations-converter/src/gettext/mod.rs index 59fcfe0b70..cbf81d5717 100644 --- a/android/translations-converter/src/gettext/mod.rs +++ b/android/translations-converter/src/gettext/mod.rs @@ -2,6 +2,7 @@ mod match_str; mod messages; mod msg_string; +mod parser; mod plural_form; use std::{ diff --git a/android/translations-converter/src/gettext/parser.rs b/android/translations-converter/src/gettext/parser.rs new file mode 100644 index 0000000000..38d2c53a37 --- /dev/null +++ b/android/translations-converter/src/gettext/parser.rs @@ -0,0 +1,138 @@ +use super::{messages::Messages, msg_string::MsgString, parse_line, PluralForm}; +use std::{collections::BTreeMap, mem}; + +/// A gettext messages file parser. +/// +/// Can parse both translations files and template files. +/// +/// # Usage +/// +/// The parser works by parsing individual lines. After creating a [`Parser`] instance, the input +/// lines should be sent to it through repeated calls to [`Parser::parse_line`], and afterwards +/// calling [`Parser::finish`] to finish parsing and obtain the parsed result. +/// +/// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed +/// that the header value is one of some hard-coded values, so if new languages that have new +/// plurals are added, the code will have to be updated. +/// +/// # Input example +/// +/// A gettext translation file has the format in the example below: +/// +/// ``` +/// # The start of the file can contain empty entries to include some header with meta +/// # information. Below is the header indicating the plural format. +/// msgid "" +/// msgstr "" +/// "Plural-Forms: nplurals=2; plural=(n != 1);" +/// +/// # Simple translated messages +/// msgid "Message in original language" +/// msgstr "Mesaĝo en tradukita lingvo" +/// +/// # Plural translated messages (with two forms) +/// msgid "One translated message" +/// msgid_plural "%d translated messages" +/// msgstr[0] "Unu tradukita mesaĝo" +/// msgstr[1] "%d tradukitaj mesaĝoj" +/// ``` +#[derive(Debug)] +pub struct Parser { + parsing_header: bool, + messages: Messages, + current_id: Option<MsgString>, + current_plural_id: Option<MsgString>, + variants: BTreeMap<usize, MsgString>, +} + +impl Parser { + /// Create a new [`Parser`] instance. + /// + /// Parsing can then be done by feeding lines to the instance using [`Parser::parse_line`] and + /// finishing with a call to [`Parser::finish`] to obtain the parsed result. + pub fn new() -> Self { + Parser { + parsing_header: false, + messages: Messages::default(), + current_id: None, + current_plural_id: None, + variants: BTreeMap::new(), + } + } + + /// Parse an input line. + /// + /// # Panics + /// + /// The method will panic if the line can not be parsed. + pub fn parse_line(&mut self, line: &str) { + match_str! { (line.trim()) + ["msgid \"", msg_id, "\""] => { + self.current_id = Some(MsgString::from_escaped(msg_id)); + }, + ["msgstr \"", translation, "\""] => { + if let Some(id) = self.current_id.take() { + self.parsing_header = id.is_empty() && translation.is_empty(); + self.messages.add(id, MsgString::from_escaped(translation)); + } + + self.current_id = None; + self.current_plural_id = None; + }, + ["msgid_plural \"", plural_id, "\""] => { + self.current_plural_id = Some(MsgString::from_escaped(plural_id)); + self.parsing_header = false; + }, + ["msgstr[", plural_translation, "\""] => { + let variant_id_end = plural_translation + .chars() + .position(|character| character == ']') + .expect("Invalid plural msgstr"); + let variant_id: usize = plural_translation[..variant_id_end] + .parse() + .expect("Invalid variant index"); + let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "") + .expect("Invalid plural msgstr"); + + self.variants.insert(variant_id, MsgString::from_escaped(variant_msg)); + self.parsing_header = false; + }, + ["\"", header, "\\n\""] => { + if self.parsing_header { + if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") { + self.messages.plural_form = PluralForm::from_formula(plural_formula); + } + } + }, + _ => { + if let Some(plural_id) = self.current_plural_id.take() { + let id = self.current_id.take().expect("Missing msgid for plural message"); + let values = mem::replace(&mut self.variants, BTreeMap::new()) + .into_iter() + .enumerate() + .inspect(|(index, (variant_id, _))| { + assert_eq!( + index, variant_id, + "Unexpected variant ID for plural msgstr" + ) + }) + .map(|(_, (_, value))| value) + .collect(); + + self.messages.add_plural(id, plural_id, values); + } + + self.current_id = None; + self.current_plural_id = None; + self.variants.clear(); + self.parsing_header = false; + }, + } + } + + /// Finish parsing and obtain the parsed [`Messages]. + pub fn finish(mut self) -> Messages { + self.parse_line(""); + self.messages + } +} |
