diff options
| author | Janito Vaqueiro Ferreira Filho <janito@mullvad.net> | 2021-05-28 09:12:47 -0300 |
|---|---|---|
| committer | Janito Vaqueiro Ferreira Filho <janito@mullvad.net> | 2021-05-28 09:12:47 -0300 |
| commit | d157ceec4d136bddfc44c42c8b88de3bc0a8a1e8 (patch) | |
| tree | 2165384e8c8d27e5ac2b8cba32bd7e66a4001c0f /android | |
| parent | c10d566a7bd5c784bbfc1aec414638ff149d2284 (diff) | |
| parent | 7615ff8cc0b449b9d18eedda565c1f90029829a7 (diff) | |
| download | mullvadvpn-d157ceec4d136bddfc44c42c8b88de3bc0a8a1e8.tar.xz mullvadvpn-d157ceec4d136bddfc44c42c8b88de3bc0a8a1e8.zip | |
Merge branch 'refactor-gettext-parser'
Diffstat (limited to 'android')
| -rw-r--r-- | android/src/main/res/values-ja/plurals.xml | 20 | ||||
| -rw-r--r-- | android/translations-converter/Cargo.toml | 1 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/match_str.rs | 211 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/messages.rs | 116 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/mod.rs | 201 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/msg_string.rs | 75 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/parser.rs | 558 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/plural_form.rs | 19 | ||||
| -rw-r--r-- | android/translations-converter/src/main.rs | 10 |
9 files changed, 1006 insertions, 205 deletions
diff --git a/android/src/main/res/values-ja/plurals.xml b/android/src/main/res/values-ja/plurals.xml index 12a276f4b6..01cee2edcb 100644 --- a/android/src/main/res/values-ja/plurals.xml +++ b/android/src/main/res/values-ja/plurals.xml @@ -1,33 +1,33 @@ <?xml version="1.0" encoding="utf-8"?> <resources> <plurals name="months_left"> - <item quantity="other">残り1ヶ月\\n\\n</item> + <item quantity="other">残り1ヶ月\\n\\n残り%1$d ヶ月</item> </plurals> <plurals name="days_ago"> - <item quantity="other">1日前\\n\\n</item> + <item quantity="other">1日前\\n\\n%1$d 日前</item> </plurals> <plurals name="days_left"> - <item quantity="other">残り1日\\n\\n</item> + <item quantity="other">残り1日\\n\\n残り%1$d日</item> </plurals> <plurals name="years_left"> - <item quantity="other">残り1年\\n\\n</item> + <item quantity="other">残り1年\\n\\n残り%1$d年</item> </plurals> <plurals name="account_credit_expires_in_days"> - <item quantity="other">アカウントのクレジットが1日後に無効になります\\n\\n</item> + <item quantity="other">アカウントのクレジットが1日後に無効になります\\n\\nアカウントのクレジットが%1$d日後に無効になります</item> </plurals> <plurals name="account_credit_expires_in_hours"> - <item quantity="other">アカウントのクレジットが1時間後に無効になります\\n\\n</item> + <item quantity="other">アカウントのクレジットが1時間後に無効になります\\n\\nアカウントのクレジットが%1$d時間後に無効になります</item> </plurals> <plurals name="minutes_ago"> - <item quantity="other">残り1分\\n\\n</item> + <item quantity="other">残り1分\\n\\n残り%1$d 分</item> </plurals> <plurals name="hours_ago"> - <item quantity="other">1時間前\\n\\n</item> + <item quantity="other">1時間前\\n\\n%1$d時間前</item> </plurals> <plurals name="months_ago"> - <item quantity="other">1ヶ月前\\n\\n</item> + <item quantity="other">1ヶ月前\\n\\n%1$d ヶ月前</item> </plurals> <plurals name="years_ago"> - <item quantity="other">1年前\\n\\n</item> + <item quantity="other">1年前\\n\\n%1$d 年前</item> </plurals> </resources> diff --git a/android/translations-converter/Cargo.toml b/android/translations-converter/Cargo.toml index a5f01aa386..50b410de29 100644 --- a/android/translations-converter/Cargo.toml +++ b/android/translations-converter/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +derive_more = "0.99" htmlize = "0.5" lazy_static = "1" regex = "1" diff --git a/android/translations-converter/src/gettext/match_str.rs b/android/translations-converter/src/gettext/match_str.rs new file mode 100644 index 0000000000..d09a639fb3 --- /dev/null +++ b/android/translations-converter/src/gettext/match_str.rs @@ -0,0 +1,211 @@ +/// A helper macro to match a string to various prefix and suffix combinations. +/// +/// This macro can be used in a way that's similar to matching slices. It's possible to match an +/// input string to: +/// +/// - a specified string; +/// - a string with a specified prefix; +/// - a string with a specified suffix; +/// - a string with a specified prefix and a specified suffix. +/// +/// Multiple match patterns can be specified for the same match arm, as long as there are no +/// bindings for that match arm. When matching with prefixes and/or suffixes, the known parts of the +/// string can be removed, and the rest of the string is bound to a binding with a specified name. +/// +/// The macro has a limitation where all match arm bodies must be separated by commas, even if the +/// body is inside braces (`{}`). +/// +/// # Examples +/// +/// When not using any bindings, multiple match patterns can be on the same match arm. +/// +/// ``` +/// # let input_string = ""; +/// +/// match_str! { (input_string.trim()) +/// ["exact_string"] => { +/// println!("Exact match") +/// }, // Note: even though the body is enclosed by braces, a comma is still necessary +/// ["prefix", ..] | [.., "suffix"] => println!("Partial match"), +/// no_match => println!("Input {:?} did not match", no_match), +/// } +/// ``` +/// +/// If a match arm uses a binding, it must only have one match pattern. +/// +/// ``` +/// # let input_string = ""; +/// +/// match_str! { (input_string.trim()) +/// ["prefix", string] => println!("Prefixed: {:?}", string), +/// [string, "suffix"] => println!("Suffixed: {:?}", string), +/// ["prefix", string, "suffix"] => println!("Prefixed and Suffixed: {:?}", string), +/// // The following does not work because the match arm has a binding and therefore can't have +/// // more than one pattern: +/// // ["prefix", string] | [string, "suffix"] => { +/// // println!("Prefixed or Suffixed: {:?}", string) +/// // } +/// } +/// ``` +/// +/// # Implementation details +/// +/// The macro starts by extracting the matched expression and binding it to a local variable. It +/// will then call itself recursively to build an `if`-`else` chain to match that variable +/// according to the desired prefix/suffix patterns. +/// +/// When calling itself recursively, a `@match_str` marker is used to mark that the macro is +/// inside an inner call. The marker is follows by an initial state, which consists of three parts. +/// The first part is the condition expression, which is built by all the match patterns of that +/// arm. The second part is the binding for the input string. The third part is the binding used +/// for that match arm. +/// +/// The third part of the state initially starts out empty, but is later replaced by either a +/// binding expression or a `@no_bindings` marker. The marker allows the condition to grow with +/// other patterns in the same match arm. +macro_rules! match_str { + // Start of matching + ( ($string:expr) $(|)* $( $match_body:tt )* ) => { + { + let string_to_match = $string; + + match_str!(@match_str((false), string_to_match) | $( $match_body )*) + } + }; + + // Match a whole string + ( + @match_str($conditions:tt, $input:ident $(, @no_bindings)*) + | [$string:literal] $( $rest:tt )* + ) => { + match_str!(@match_str(($conditions || $input == $string), $input, @no_bindings) $( $rest )*) + }; + + // Match a string with a given prefix + ( + @match_str($conditions:tt, $input:ident $(, @no_bindings)*) + | [$prefix:literal, ..] $( $rest:tt )* + ) => { + match_str!( + @match_str(($conditions || $input.starts_with($prefix)), $input, @no_bindings) + $( $rest )* + ) + }; + + // Match a string with a given suffix + ( + @match_str($conditions:tt, $input:ident $(, @no_bindings)*) + | [.., $suffix:literal] $( $rest:tt )* ) => { + match_str!( + @match_str(($conditions || $input.ends_with($suffix)), $input, @no_bindings) + $( $rest )* + ) + }; + + // Match a string with a given prefix and suffix + ( + @match_str($conditions:tt, $input:ident $(, @no_bindings)*) + | [$prefix:literal, .., $suffix:literal] + $( $rest:tt )* + ) => { + match_str!( + @match_str( + ($conditions || ($input.starts_with($prefix) && $input.ends_with($suffix))), + $input, + @no_bindings + ) + $( $rest )* + ) + }; + + // Match a string with a given prefix, binding the rest of the string after the prefix + ( + @match_str($conditions:tt, $input:ident) + | [$prefix:literal, $binding:ident] $( $rest:tt )* + ) => { + match_str!( + @match_str( + ($conditions || $input.starts_with($prefix)), + $input, + @binding $binding = &$input[$prefix.len()..] + ) + $( $rest )* + ) + }; + + // Match a string with a given suffix, binding the start of the string up to before the suffix + ( + @match_str($conditions:tt, $input:ident) + | [$binding:ident, $suffix:literal] $( $rest:tt )* + ) => { + match_str!( + @match_str( + ($conditions || $input.ends_with($suffix)), + $input, + @binding $binding = &$input[..($input.len()-$suffix.len())] + ) + $( $rest )* + ) + }; + + // Match a string with a given prefix and suffix, binding the middle of the string, starting + // after the prefix and ending before the suffix + ( + @match_str($conditions:tt, $input:ident) + | [$prefix:literal, $binding:ident, $suffix:literal] $( $rest:tt )* + ) => { + match_str!( + @match_str( + ($conditions || ($input.starts_with($prefix) && $input.ends_with($suffix))), + $input, + @binding $binding = &$input[$prefix.len()..($input.len()-$suffix.len())] + ) + $( $rest )* + ) + }; + + // Final empty `else` body + ( @match_str((false), $input:ident) |) => { {} }; + + // Final empty `else` body + ( @match_str((false), $input:ident) | _ => $body:expr $(,)*) => { + { + $body + } + }; + + // Final `else` body with a catch-all binding + ( @match_str((false), $input:ident) | $binding:ident => $body:expr $(,)* ) => { + { + let $binding = $input; + + $body + } + }; + + // Build `if` body + ( + @match_str($conditions:tt, $input:ident, @no_bindings) + => $body:expr , $(,)* $(|)* $( $rest:tt )* + ) => { + if $conditions { + $body + } else { + match_str!(@match_str((false), $input) | $( $rest )*) + } + }; + + // Build `if` body with a specified binding + ( + @match_str($conditions:tt, $input:ident, @binding $binding:ident = $binding_expr:expr) + => $body:expr , $(,)* $(|)* $( $rest:tt )* + ) => { + if $conditions { + let $binding = $binding_expr; + + $body + } else { + match_str!(@match_str((false), $input) | $( $rest )*) + } + }; +} diff --git a/android/translations-converter/src/gettext/messages.rs b/android/translations-converter/src/gettext/messages.rs new file mode 100644 index 0000000000..c8a29bd734 --- /dev/null +++ b/android/translations-converter/src/gettext/messages.rs @@ -0,0 +1,116 @@ +use super::{msg_string::MsgString, parser::Parser, plural_form::PluralForm}; +use derive_more::{Display, Error, From}; +use std::{ + fs::File, + io::{BufRead, BufReader}, + path::Path, +}; + +/// A parsed gettext messages file. +#[derive(Clone, Debug, Default)] +pub struct Messages { + pub plural_form: Option<PluralForm>, + entries: Vec<MsgEntry>, +} + +/// A message entry in a gettext translation file. +#[derive(Clone, Debug)] +pub struct MsgEntry { + pub id: MsgString, + pub value: MsgValue, +} + +/// A message string or plural set in a gettext translation file. +#[derive(Clone, Debug)] +pub enum MsgValue { + Invariant(MsgString), + Plural { + plural_id: MsgString, + values: Vec<MsgString>, + }, +} + +impl Messages { + /// Load message entries from a gettext translation file. + /// + /// See [`Parser`] for more information. + pub fn from_file(file_path: impl AsRef<Path>) -> Result<Self, Error> { + let file = BufReader::new(File::open(file_path).expect("Failed to open gettext file")); + let mut parser = Parser::new(); + + for line in file.lines() { + parser.parse_line(&line?)?; + } + + Ok(parser.finish()?) + } + + /// Construct an empty messages list configured with the specified plural form. + pub fn with_plural_form(plural_form: PluralForm) -> Self { + Messages { + plural_form: Some(plural_form), + entries: Vec::new(), + } + } + + /// Create a messages list with a single non-plural entry. + /// + /// The plural form for the messages is left unconfigured. + pub fn starting_with(id: MsgString, msg_str: MsgString) -> Self { + let first_entry = MsgEntry { + id, + value: MsgValue::Invariant(msg_str), + }; + + Messages { + plural_form: None, + entries: vec![first_entry], + } + } + + /// Add a non-plural entry. + pub fn add(&mut self, id: MsgString, msg_str: MsgString) { + let entry = MsgEntry { + id, + value: MsgValue::Invariant(msg_str), + }; + + self.entries.push(entry); + } + + /// Add a plural entry. + pub fn add_plural(&mut self, id: MsgString, plural_id: MsgString, values: Vec<MsgString>) { + let entry = MsgEntry { + id, + value: MsgValue::Plural { plural_id, values }, + }; + + self.entries.push(entry); + } +} + +impl IntoIterator for Messages { + type Item = MsgEntry; + type IntoIter = std::vec::IntoIter<Self::Item>; + + fn into_iter(self) -> Self::IntoIter { + self.entries.into_iter() + } +} + +impl From<MsgString> for MsgValue { + fn from(string: MsgString) -> Self { + MsgValue::Invariant(string) + } +} + +#[derive(Debug, Display, Error, From)] +pub enum Error { + /// Parser error while parsing file + #[display(fmt = "Failed to parse input file")] + Parse(super::parser::Error), + + /// IO error while reading input file. + #[display(fmt = "Failed to read from the input file")] + Io(std::io::Error), +} diff --git a/android/translations-converter/src/gettext/mod.rs b/android/translations-converter/src/gettext/mod.rs index 1bed501b72..72601abf0e 100644 --- a/android/translations-converter/src/gettext/mod.rs +++ b/android/translations-converter/src/gettext/mod.rs @@ -1,189 +1,21 @@ +#[macro_use] +mod match_str; +mod messages; mod msg_string; +mod parser; mod plural_form; use std::{ - collections::BTreeMap, - fs::{File, OpenOptions}, - io::{self, BufRead, BufReader, BufWriter, Write}, - mem, + fs::OpenOptions, + io::{self, BufWriter, Write}, path::Path, }; -pub use self::{msg_string::MsgString, plural_form::PluralForm}; - -/// A parsed gettext translation file. -#[derive(Clone, Debug)] -pub struct Translation { - pub plural_form: Option<PluralForm>, - entries: Vec<MsgEntry>, -} - -/// A message entry in a gettext translation file. -#[derive(Clone, Debug)] -pub struct MsgEntry { - pub id: MsgString, - pub value: MsgValue, -} - -/// A message string or plural set in a gettext translation file. -#[derive(Clone, Debug)] -pub enum MsgValue { - Invariant(MsgString), - Plural { - plural_id: MsgString, - values: Vec<MsgString>, - }, -} - -/// A helper macro to match a string to various prefix and suffix combinations. -macro_rules! match_str { - ( - ( $string:expr ) - $( [$start:expr, $middle:ident, $end:expr] => $body:tt )* - _ => $else:expr $(,)* - ) => { - $( - if let Some($middle) = parse_line($string, $start, $end) { - $body - } else - )* { - $else - } - }; -} - -impl Translation { - /// Load message entries from a gettext translation file. - /// - /// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed - /// that the header value is one of some hard-coded values, so if new languages that have new - /// plurals are added, the code will have to be updated. - /// - /// An gettext translation file has the format in the example below: - /// - /// ``` - /// # The start of the file can contain empty entries to include some header with meta - /// # information. Below is the header indicating the plural format. - /// msgid "" - /// msgstr "" - /// "Plural-Forms: nplurals=2; plural=(n != 1);" - /// - /// # Simple translated messages - /// msgid "Message in original language" - /// msgstr "Mesaĝo en tradukita lingvo" - /// - /// # Plural translated messages (with two forms) - /// msgid "One translated message" - /// msgid_plural "%d translated messages" - /// msgstr[0] "Unu tradukita mesaĝo" - /// msgstr[1] "%d tradukitaj mesaĝoj" - /// ``` - pub fn from_file(file_path: impl AsRef<Path>) -> Self { - let mut parsing_header = false; - let mut entries = Vec::new(); - let mut current_id = None; - let mut current_plural_id = None; - let mut plural_form = None; - let mut variants = BTreeMap::new(); - - let file = BufReader::new(File::open(file_path).expect("Failed to open gettext file")); - // Ensure there's an empty line at the end so that the "else" part of the string matching - // code will run for the last message in the file. - let lines = file - .lines() - .map(|line_result| line_result.expect("Failed to read from gettext file")) - .chain(Some(String::new())); - - for line in lines { - match_str! { (line.trim()) - ["msgid \"", msg_id, "\""] => { - current_id = Some(MsgString::from_escaped(msg_id)); - } - ["msgstr \"", translation, "\""] => { - if let Some(id) = current_id.take() { - let value = MsgValue::Invariant(MsgString::from_escaped(translation)); - - parsing_header = id.is_empty() && translation.is_empty(); - - entries.push(MsgEntry { id, value }); - } - - current_id = None; - current_plural_id = None; - } - ["msgid_plural \"", plural_id, "\""] => { - current_plural_id = Some(MsgString::from_escaped(plural_id)); - parsing_header = false; - } - ["msgstr[", plural_translation, "\""] => { - let variant_id_end = plural_translation - .chars() - .position(|character| character == ']') - .expect("Invalid plural msgstr"); - let variant_id: usize = plural_translation[..variant_id_end] - .parse() - .expect("Invalid variant index"); - let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "") - .expect("Invalid plural msgstr"); - - variants.insert(variant_id, MsgString::from_escaped(variant_msg)); - parsing_header = false; - } - ["\"", header, "\\n\""] => { - if parsing_header { - if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") { - plural_form = PluralForm::from_formula(plural_formula); - } - } - } - _ => { - if let Some(plural_id) = current_plural_id.take() { - let id = current_id.take().expect("Missing msgid for plural message"); - let values = mem::replace(&mut variants, BTreeMap::new()) - .into_iter() - .enumerate() - .inspect(|(index, (variant_id, _))| { - assert_eq!( - index, variant_id, - "Unexpected variant ID for plural msgstr" - ) - }) - .map(|(_, (_, value))| value) - .collect(); - let value = MsgValue::Plural { plural_id, values }; - - entries.push(MsgEntry { id, value }); - } - - current_id = None; - current_plural_id = None; - variants.clear(); - parsing_header = false; - } - } - } - - Self { - entries, - plural_form, - } - } -} - -impl IntoIterator for Translation { - type Item = MsgEntry; - type IntoIter = std::vec::IntoIter<Self::Item>; - - fn into_iter(self) -> Self::IntoIter { - self.entries.into_iter() - } -} - -impl From<MsgString> for MsgValue { - fn from(string: MsgString) -> Self { - MsgValue::Invariant(string) - } -} +pub use self::{ + messages::{Messages, MsgEntry, MsgValue}, + msg_string::MsgString, + plural_form::PluralForm, +}; /// Append message entries to a translation file. /// @@ -219,14 +51,3 @@ pub fn append_to_template( Ok(()) } - -fn parse_line<'l>(line: &'l str, prefix: &str, suffix: &str) -> Option<&'l str> { - if line.starts_with(prefix) && line.ends_with(suffix) { - let start = prefix.len(); - let end = line.len() - suffix.len(); - - Some(&line[start..end]) - } else { - None - } -} diff --git a/android/translations-converter/src/gettext/msg_string.rs b/android/translations-converter/src/gettext/msg_string.rs index b6ae21aadf..83bcd90c30 100644 --- a/android/translations-converter/src/gettext/msg_string.rs +++ b/android/translations-converter/src/gettext/msg_string.rs @@ -1,10 +1,10 @@ use std::{ fmt::{self, Display, Formatter}, - ops::Deref, + ops::{Add, AddAssign, Deref}, }; /// A message string in a gettext translation file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct MsgString(String); impl MsgString { @@ -51,6 +51,41 @@ impl Deref for MsgString { } } +impl AsRef<MsgString> for MsgString { + fn as_ref(&self) -> &Self { + self + } +} + +impl<M> AddAssign<M> for MsgString +where + M: AsRef<MsgString>, +{ + fn add_assign(&mut self, other: M) { + self.0 += &other.as_ref().0; + } +} + +impl<M> Add<M> for MsgString +where + M: AsRef<MsgString>, +{ + type Output = MsgString; + + fn add(mut self, other: M) -> Self::Output { + self += other; + self + } +} + +impl<'l, 'r> Add<&'r MsgString> for &'l MsgString { + type Output = MsgString; + + fn add(self, other: &'r MsgString) -> Self::Output { + MsgString(self.0.clone() + &other.0) + } +} + #[cfg(test)] mod tests { use super::MsgString; @@ -89,4 +124,40 @@ mod tests { assert_eq!(input.to_string(), original); } + + #[test] + fn appending() { + let mut target = MsgString::from_unescaped(r#""Initial""#); + let extra = MsgString::from_escaped(r#"\"Extra\""#); + + target += extra; + + let expected = concat!(r#"\"Initial\"#, r#""\"Extra\""#); + + assert_eq!(target.to_string(), expected); + } + + #[test] + fn concatenating_by_moving() { + let start = MsgString::from_unescaped(r#""Start""#); + let end = MsgString::from_escaped(r#"\"End\""#); + + let result = start + end; + + let expected = concat!(r#"\"Start\"#, r#""\"End\""#); + + assert_eq!(result.to_string(), expected); + } + + #[test] + fn concatenating_by_borrowing() { + let start = MsgString::from_escaped(r#"\"Start\""#); + let end = MsgString::from_unescaped(r#""End""#); + + let result = &start + &end; + + let expected = concat!(r#"\"Start\"#, r#""\"End\""#); + + assert_eq!(result.to_string(), expected); + } } diff --git a/android/translations-converter/src/gettext/parser.rs b/android/translations-converter/src/gettext/parser.rs new file mode 100644 index 0000000000..749922a7aa --- /dev/null +++ b/android/translations-converter/src/gettext/parser.rs @@ -0,0 +1,558 @@ +use super::{Messages, MsgString, PluralForm}; +use derive_more::{Display, Error}; +use std::{collections::BTreeMap, mem}; + +/// A gettext messages file parser. +/// +/// Can parse both translations files and template files. +/// +/// # Usage +/// +/// The parser works by parsing individual lines. After creating a [`Parser`] instance, the input +/// lines should be sent to it through repeated calls to [`Parser::parse_line`], and afterwards +/// calling [`Parser::finish`] to finish parsing and obtain the parsed result. +/// +/// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed +/// that the header value is one of some hard-coded values, so if new languages that have new +/// plurals are added, the code will have to be updated. +/// +/// # Input example +/// +/// A gettext translation file has the format in the example below: +/// +/// ``` +/// # The start of the file can contain empty entries to include some header with meta +/// # information. Below is the header indicating the plural format. +/// msgid "" +/// msgstr "" +/// "Plural-Forms: nplurals=2; plural=(n != 1);" +/// +/// # Simple translated messages +/// msgid "Message in original language" +/// msgstr "Mesaĝo en tradukita lingvo" +/// +/// # Plural translated messages (with two forms) +/// msgid "One translated message" +/// msgid_plural "%d translated messages" +/// msgstr[0] "Unu tradukita mesaĝo" +/// msgstr[1] "%d tradukitaj mesaĝoj" +/// ``` +#[derive(Debug)] +pub enum Parser { + /// Initial state. + /// + /// No useful information has been extracted yet. + Start, + + /// Possible start of file header. + /// + /// Found an empty message ID, if the next line is an empty message string the header of the + /// file has been found. + HeaderStart, + + /// Start of file header found. + Header, + + /// Skipping to the end of the header. + /// + /// The useful information has already been extracted. + HeaderEnd(Messages), + + /// Waiting for a next message section. + /// + /// Parser has completed parsing either at least one valid entry or the file header. + Idle(Messages), + + /// New message entry. + /// + /// Parsed a new message ID. + NewEntry { id: MsgString, messages: Messages }, + + /// Parsing a message entry. + /// + /// Parsed a message ID and a message string, but the string could be incomplete with the rest + /// of it spread among more lines. + InvariantEntry { + id: MsgString, + message: MsgString, + messages: Messages, + }, + + /// Detected that entry is for a plural. + /// + /// Found a plural ID, may have parsed variants. + NewPluralEntry { + id: MsgString, + plural_id: MsgString, + variants: BTreeMap<usize, MsgString>, + messages: Messages, + }, + + /// Parsing a plural entry variant. + /// + /// Parsed the start of a plural variant string, but the string could be incomplete with the + /// rest of it spread among more lines. + PluralEntry { + id: MsgString, + plural_id: MsgString, + index: usize, + variant: MsgString, + variants: BTreeMap<usize, MsgString>, + messages: Messages, + }, + + /// Internal transition state. + /// + /// Used while a line is being parsed. + Parsing, +} + +impl Parser { + /// Create a new [`Parser`] instance. + /// + /// Parsing can then be done by feeding lines to the instance using [`Parser::parse_line`] and + /// finishing with a call to [`Parser::finish`] to obtain the parsed result. + pub fn new() -> Self { + Parser::Start + } + + /// Parse an input line. + pub fn parse_line(&mut self, line: &str) -> Result<(), Error> { + let state = mem::replace(self, Parser::Parsing); + + *self = match state { + Parser::Start => Self::parse_start(line)?, + Parser::HeaderStart => Self::parse_header_start(line)?, + Parser::Header => Self::parse_header(line)?, + Parser::HeaderEnd(messages) => Self::parse_header_end(line, messages)?, + Parser::Idle(messages) => Self::parse_idle(line, messages)?, + Parser::NewEntry { id, messages } => Self::parse_new_entry(line, id, messages)?, + Parser::InvariantEntry { + id, + message, + messages, + } => Self::parse_invariant_entry(line, id, message, messages)?, + Parser::NewPluralEntry { + id, + plural_id, + variants, + messages, + } => Self::parse_new_plural_entry(line, id, plural_id, variants, messages)?, + Parser::PluralEntry { + id, + plural_id, + index, + variant, + variants, + messages, + } => Self::parse_plural_entry(line, id, plural_id, index, variant, variants, messages)?, + Parser::Parsing => unreachable!("Parser should never stop on the Parsing state"), + }; + + Ok(()) + } + + /// Finish parsing and obtain the parsed [`Messages]. + pub fn finish(self) -> Result<Messages, Error> { + match self { + // Input file is empty + Parser::Start => Ok(Messages::default()), + + // A single empty msgid was parsed, but no msgstr for that entry (or header) + Parser::HeaderStart => Err(Error::IncompleteEntry(MsgString::empty())), + + // Input file only contains headers that were ignored + Parser::Header => Ok(Messages::default()), + + // Input file only contains headers, but the plural form was successfully parsed + Parser::HeaderEnd(messages) => Ok(messages), + + // Parsing successful + Parser::Idle(messages) => Ok(messages), + + // Input file ends on an incomplete entry + Parser::NewEntry { id, .. } => Err(Error::IncompleteEntry(id)), + + // Input file ends on an invariant entry + Parser::InvariantEntry { + id, + message, + mut messages, + } => { + messages.add(id, message); + + Ok(messages) + } + + // Input file ends with an empty plural entry + Parser::NewPluralEntry { id, .. } => Err(Error::IncompletePluralEntry(id)), + + // Input file ends with a plural entry (it might be missing variants) + Parser::PluralEntry { + id, + plural_id, + index, + variant, + mut variants, + mut messages, + } => { + variants.insert(index, variant); + + let variants = collect_variants(&id, variants)?; + + messages.add_plural(id, plural_id, variants); + + Ok(messages) + } + + Parser::Parsing => unreachable!("Parser should never stop on the Parsing state"), + } + } + + fn parse_start(line: &str) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore empty lines and comment lines + [""] | ["#", ..] => Parser::Start, + + // An empty message ID may indicate the start of the header + ["msgid \"\""] => Parser::HeaderStart, + + // Headers don't have context, so skip it and get ready to parse entries + ["msgctxt ", ..] => Parser::Idle(Messages::default()), + + // File has no header, went directly to the first entry + ["msgid \"", msg_id, "\""] => Parser::NewEntry { + id: MsgString::from_escaped(msg_id), + messages: Messages::default() + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_header_start(line: &str) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore comment lines + ["#", ..] => Parser::HeaderStart, + + // An empty message string confirms the start of the header + ["msgstr \"\""] => Parser::Header, + + // A non-empty message string means an entry with an empty ID has been parsed + ["msgstr \"", string, "\""] => Parser::Idle( + Messages::starting_with(MsgString::empty(), MsgString::from_escaped(string)) + ), + + // A plural ID means this is the start of a plural entry with an empty ID + ["msgid_plural \"", plural_id, "\""] => Parser::NewPluralEntry { + id: MsgString::empty(), + plural_id: MsgString::from_escaped(plural_id), + variants: BTreeMap::new(), + messages: Messages::default(), + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_header(line: &str) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore comment lines + ["#", ..] => Parser::HeaderStart, + + // An empty line marks the end of the header + [""] => Parser::Idle(Messages::default()), + + // The Plural-Forms header is the only header that's currently used, so after finding + // it the parser can skip to the end of the headers + ["\"Plural-Forms: ", plural_formula, ";\\n\""] => { + let plural_form = PluralForm::from_formula(plural_formula) + .ok_or_else(|| Error::UnrecognizedPluralFormula(plural_formula.to_owned()))?; + + Parser::HeaderEnd(Messages::with_plural_form(plural_form)) + }, + + // Skip other headers + ["\"", .., "\\n\""] => Parser::Header, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_header_end(line: &str, messages: Messages) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // An empty line marks the end of the header + [""] => Parser::Idle(messages), + + // Ignore comment lines + ["#", ..] => Parser::HeaderEnd(messages), + + // Skip any other headers + ["\"", .., "\\n\""] => Parser::HeaderEnd(messages), + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_idle(line: &str, messages: Messages) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore empty lines, comment lines and message context lines + [""] | ["#", ..] | ["msgctxt ", ..] => Parser::Idle(messages), + + // Start of a new message entry + ["msgid \"", msg_id, "\""] => Parser::NewEntry { + id: MsgString::from_escaped(msg_id), + messages, + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_new_entry(line: &str, id: MsgString, messages: Messages) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore comment lines + ["#", ..] => Parser::NewEntry { id, messages }, + + // A message string for an invariant entry + ["msgstr \"", string, "\""] => Parser::InvariantEntry { + id, + message: MsgString::from_escaped(string), + messages, + }, + + // A plural ID means this is the start of a plural entry + ["msgid_plural \"", plural_id, "\""] => Parser::NewPluralEntry { + id, + plural_id: MsgString::from_escaped(plural_id), + variants: BTreeMap::new(), + messages, + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_invariant_entry( + line: &str, + id: MsgString, + mut message: MsgString, + mut messages: Messages, + ) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore comment lines + ["#", ..] => Parser::InvariantEntry { id, message, messages }, + + // The entry message string continues on this line + ["\"", string, "\""] => { + message += MsgString::from_escaped(string); + + Parser::InvariantEntry { id, message, messages } + }, + + // End of the entry + [""] => { + messages.add(id, message); + + Parser::Idle(messages) + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_new_plural_entry( + line: &str, + id: MsgString, + plural_id: MsgString, + variants: BTreeMap<usize, MsgString>, + mut messages: Messages, + ) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore comment lines + ["#", ..] => Parser::NewPluralEntry { id, plural_id, variants, messages }, + + // A message string for a plural variant + ["msgstr[", index_and_string, "\""] => { + let (index, variant) = extract_plural_variant(index_and_string)?; + + Parser::PluralEntry { + id, + plural_id, + index, + variant, + variants, + messages, + } + }, + + // An empty line marks the end of the plural entry + [""] => { + let variants = collect_variants(&id, variants)?; + + messages.add_plural(id, plural_id, variants); + + Parser::Idle(messages) + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } + + fn parse_plural_entry( + line: &str, + id: MsgString, + plural_id: MsgString, + index: usize, + mut variant: MsgString, + mut variants: BTreeMap<usize, MsgString>, + mut messages: Messages, + ) -> Result<Parser, Error> { + let next_state = match_str! { (line) + // Ignore comment lines + ["#", ..] => { + Parser::PluralEntry { id, plural_id, index, variant, variants, messages } + }, + + // The variant message string continues on this line + ["\"", string, "\""] => { + variant += MsgString::from_escaped(string); + + Parser::PluralEntry { + id, + plural_id, + index, + variant, + variants, + messages + } + }, + + // A message string indicating the end of the current variant and th start of another + ["msgstr[", index_and_string, "\""] => { + let (new_index, new_variant) = extract_plural_variant(index_and_string)?; + + variants.insert(index, variant); + + Parser::PluralEntry { + id, + plural_id, + index: new_index, + variant: new_variant, + variants, + messages, + } + }, + + // An empty line marks the end of the plural entry (and hence the current variant as + // well) + [""] => { + variants.insert(index, variant); + + let variants = collect_variants(&id, variants)?; + + messages.add_plural(id, plural_id, variants); + + Parser::Idle(messages) + }, + + other => return Err(Error::UnexpectedLine(other.to_owned())), + }; + + Ok(next_state) + } +} + +/// Helper function to extract the plural variant index and message. +/// +/// The parser will try to parse a plural line of the form `msgstr[1] "%d tradukitaj mesaĝoj"`. +/// When matching the line to the expected template, it will remove the `msgstr[` prefix and the +/// `"` suffix. This function will then parse the rest of the string (`1] "%d tradukitaj mesaĝoj`) +/// by extracting the index (1), and then extracting the message string by skipping the separator +/// (`] "`). +fn extract_plural_variant(index_and_string: &str) -> Result<(usize, MsgString), Error> { + let recreate_line = || format!("msgstr[{}\"", index_and_string); + + let parts: Vec<_> = index_and_string.splitn(2, "] \"").collect(); + + if parts.len() != 2 { + return Err(Error::InvalidPluralVariant(recreate_line())); + } + + let index_string = parts[0]; + let message_string = parts[1]; + + let index = index_string + .parse() + .map_err(|_| Error::InvalidPluralIndex(recreate_line()))?; + + let variant_message = MsgString::from_escaped(message_string); + + Ok((index, variant_message)) +} + +/// Helper function to collect parsed variants. +/// +/// This will return only the variant messages in index order. The function will return an error if +/// any variant index is missing. +fn collect_variants( + id: &MsgString, + variant_map: BTreeMap<usize, MsgString>, +) -> Result<Vec<MsgString>, Error> { + let index_count = variant_map.len(); + + for index in 0..index_count { + if !variant_map.contains_key(&index) { + return Err(Error::IncompletePluralEntry(id.clone())); + } + } + + Ok(variant_map + .into_iter() + .map(|(_, variant)| variant) + .collect()) +} + +/// Parsing errors. +#[derive(Clone, Debug, Display, Error, Eq, PartialEq)] +pub enum Error { + /// An unexpected line was read while parsing. + #[display(fmt = "Unexpected line parsing gettext messages: {}", _0)] + UnexpectedLine(#[error(not(source))] String), + + /// Input uses an unrecognized plural forumal. + #[display(fmt = "Input uses an unrecognized formula for the plural form: {}", _0)] + UnrecognizedPluralFormula(#[error(not(source))] String), + + /// Input ended with an incomplete entry. + #[display(fmt = "Input ended with an incomplete gettext entry with ID: {}", _0)] + IncompleteEntry(#[error(not(source))] MsgString), + + /// Plural entry definition is missing a plural variant. + #[display(fmt = "Plural entry is missing a plural variant: {}", _0)] + IncompletePluralEntry(#[error(not(source))] MsgString), + + /// Plural variant is invalid. + #[display(fmt = "Plural variant line is invalid: {}", _0)] + InvalidPluralVariant(#[error(not(source))] String), + + /// Plural variant index was not parsable. + #[display(fmt = "Plural variant line contains an invalid index: {}", _0)] + InvalidPluralIndex(#[error(not(source))] String), +} diff --git a/android/translations-converter/src/gettext/plural_form.rs b/android/translations-converter/src/gettext/plural_form.rs index c55066c7b7..e09e9cfd04 100644 --- a/android/translations-converter/src/gettext/plural_form.rs +++ b/android/translations-converter/src/gettext/plural_form.rs @@ -1,3 +1,6 @@ +use derive_more::{Display, Error}; +use std::str::FromStr; + /// Known plural forms. #[derive(Clone, Copy, Debug)] pub enum PluralForm { @@ -29,3 +32,19 @@ impl PluralForm { } } } + +impl FromStr for PluralForm { + type Err = UnsupportedPluralFormulaError; + + fn from_str(string: &str) -> Result<Self, Self::Err> { + PluralForm::from_formula(string) + .ok_or_else(|| UnsupportedPluralFormulaError(string.to_owned())) + } +} + +/// Failed to create [`PluralForm`] from specified plural formula. +/// +/// The formula could be an invalid formula, or support for it hasn't been added yet. +#[derive(Clone, Debug, Display, Error)] +#[display(fmt = "Unsupported plural formula: {}", _0)] +pub struct UnsupportedPluralFormulaError(#[error(not(source))] String); diff --git a/android/translations-converter/src/main.rs b/android/translations-converter/src/main.rs index d11320b070..8444470cd3 100644 --- a/android/translations-converter/src/main.rs +++ b/android/translations-converter/src/main.rs @@ -98,19 +98,23 @@ fn main() { fs::create_dir(&destination_dir).expect("Failed to create Android locale directory"); } + let translations = gettext::Messages::from_file(&locale_file) + .expect("Failed to load translations for a locale"); + generate_translations( locale, known_urls.clone(), known_strings.clone(), known_plurals.clone(), - gettext::Translation::from_file(&locale_file), + translations, destination_dir.join("strings.xml"), destination_dir.join("plurals.xml"), ); } let template_path = locale_dir.join("messages.pot"); - let template = gettext::Translation::from_file(&template_path); + let template = gettext::Messages::from_file(&template_path) + .expect("Failed to load messages template file"); let mut missing_translations = known_strings; let mut missing_plurals: HashMap<_, _> = known_plurals; @@ -228,7 +232,7 @@ fn generate_translations( known_urls: HashMap<String, String>, mut known_strings: HashMap<String, String>, mut known_plurals: HashMap<String, String>, - translations: gettext::Translation, + translations: gettext::Messages, strings_output_path: impl AsRef<Path>, plurals_output_path: impl AsRef<Path>, ) { |
