summaryrefslogtreecommitdiffhomepage
path: root/android
diff options
context:
space:
mode:
authorJanito Vaqueiro Ferreira Filho <janito@mullvad.net>2021-05-28 09:12:47 -0300
committerJanito Vaqueiro Ferreira Filho <janito@mullvad.net>2021-05-28 09:12:47 -0300
commitd157ceec4d136bddfc44c42c8b88de3bc0a8a1e8 (patch)
tree2165384e8c8d27e5ac2b8cba32bd7e66a4001c0f /android
parentc10d566a7bd5c784bbfc1aec414638ff149d2284 (diff)
parent7615ff8cc0b449b9d18eedda565c1f90029829a7 (diff)
downloadmullvadvpn-d157ceec4d136bddfc44c42c8b88de3bc0a8a1e8.tar.xz
mullvadvpn-d157ceec4d136bddfc44c42c8b88de3bc0a8a1e8.zip
Merge branch 'refactor-gettext-parser'
Diffstat (limited to 'android')
-rw-r--r--android/src/main/res/values-ja/plurals.xml20
-rw-r--r--android/translations-converter/Cargo.toml1
-rw-r--r--android/translations-converter/src/gettext/match_str.rs211
-rw-r--r--android/translations-converter/src/gettext/messages.rs116
-rw-r--r--android/translations-converter/src/gettext/mod.rs201
-rw-r--r--android/translations-converter/src/gettext/msg_string.rs75
-rw-r--r--android/translations-converter/src/gettext/parser.rs558
-rw-r--r--android/translations-converter/src/gettext/plural_form.rs19
-rw-r--r--android/translations-converter/src/main.rs10
9 files changed, 1006 insertions, 205 deletions
diff --git a/android/src/main/res/values-ja/plurals.xml b/android/src/main/res/values-ja/plurals.xml
index 12a276f4b6..01cee2edcb 100644
--- a/android/src/main/res/values-ja/plurals.xml
+++ b/android/src/main/res/values-ja/plurals.xml
@@ -1,33 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<plurals name="months_left">
- <item quantity="other">残り1ヶ月\\n\\n</item>
+ <item quantity="other">残り1ヶ月\\n\\n残り%1$d ヶ月</item>
</plurals>
<plurals name="days_ago">
- <item quantity="other">1日前\\n\\n</item>
+ <item quantity="other">1日前\\n\\n%1$d 日前</item>
</plurals>
<plurals name="days_left">
- <item quantity="other">残り1日\\n\\n</item>
+ <item quantity="other">残り1日\\n\\n残り%1$d日</item>
</plurals>
<plurals name="years_left">
- <item quantity="other">残り1年\\n\\n</item>
+ <item quantity="other">残り1年\\n\\n残り%1$d年</item>
</plurals>
<plurals name="account_credit_expires_in_days">
- <item quantity="other">アカウントのクレジットが1日後に無効になります\\n\\n</item>
+ <item quantity="other">アカウントのクレジットが1日後に無効になります\\n\\nアカウントのクレジットが%1$d日後に無効になります</item>
</plurals>
<plurals name="account_credit_expires_in_hours">
- <item quantity="other">アカウントのクレジットが1時間後に無効になります\\n\\n</item>
+ <item quantity="other">アカウントのクレジットが1時間後に無効になります\\n\\nアカウントのクレジットが%1$d時間後に無効になります</item>
</plurals>
<plurals name="minutes_ago">
- <item quantity="other">残り1分\\n\\n</item>
+ <item quantity="other">残り1分\\n\\n残り%1$d 分</item>
</plurals>
<plurals name="hours_ago">
- <item quantity="other">1時間前\\n\\n</item>
+ <item quantity="other">1時間前\\n\\n%1$d時間前</item>
</plurals>
<plurals name="months_ago">
- <item quantity="other">1ヶ月前\\n\\n</item>
+ <item quantity="other">1ヶ月前\\n\\n%1$d ヶ月前</item>
</plurals>
<plurals name="years_ago">
- <item quantity="other">1年前\\n\\n</item>
+ <item quantity="other">1年前\\n\\n%1$d 年前</item>
</plurals>
</resources>
diff --git a/android/translations-converter/Cargo.toml b/android/translations-converter/Cargo.toml
index a5f01aa386..50b410de29 100644
--- a/android/translations-converter/Cargo.toml
+++ b/android/translations-converter/Cargo.toml
@@ -8,6 +8,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+derive_more = "0.99"
htmlize = "0.5"
lazy_static = "1"
regex = "1"
diff --git a/android/translations-converter/src/gettext/match_str.rs b/android/translations-converter/src/gettext/match_str.rs
new file mode 100644
index 0000000000..d09a639fb3
--- /dev/null
+++ b/android/translations-converter/src/gettext/match_str.rs
@@ -0,0 +1,211 @@
+/// A helper macro to match a string to various prefix and suffix combinations.
+///
+/// This macro can be used in a way that's similar to matching slices. It's possible to match an
+/// input string to:
+///
+/// - a specified string;
+/// - a string with a specified prefix;
+/// - a string with a specified suffix;
+/// - a string with a specified prefix and a specified suffix.
+///
+/// Multiple match patterns can be specified for the same match arm, as long as there are no
+/// bindings for that match arm. When matching with prefixes and/or suffixes, the known parts of the
+/// string can be removed, and the rest of the string is bound to a binding with a specified name.
+///
+/// The macro has a limitation where all match arm bodies must be separated by commas, even if the
+/// body is inside braces (`{}`).
+///
+/// # Examples
+///
+/// When not using any bindings, multiple match patterns can be on the same match arm.
+///
+/// ```
+/// # let input_string = "";
+///
+/// match_str! { (input_string.trim())
+/// ["exact_string"] => {
+/// println!("Exact match")
+/// }, // Note: even though the body is enclosed by braces, a comma is still necessary
+/// ["prefix", ..] | [.., "suffix"] => println!("Partial match"),
+/// no_match => println!("Input {:?} did not match", no_match),
+/// }
+/// ```
+///
+/// If a match arm uses a binding, it must only have one match pattern.
+///
+/// ```
+/// # let input_string = "";
+///
+/// match_str! { (input_string.trim())
+/// ["prefix", string] => println!("Prefixed: {:?}", string),
+/// [string, "suffix"] => println!("Suffixed: {:?}", string),
+/// ["prefix", string, "suffix"] => println!("Prefixed and Suffixed: {:?}", string),
+/// // The following does not work because the match arm has a binding and therefore can't have
+/// // more than one pattern:
+/// // ["prefix", string] | [string, "suffix"] => {
+/// // println!("Prefixed or Suffixed: {:?}", string)
+/// // }
+/// }
+/// ```
+///
+/// # Implementation details
+///
+/// The macro starts by extracting the matched expression and binding it to a local variable. It
+/// will then call itself recursively to build an `if`-`else` chain to match that variable
+/// according to the desired prefix/suffix patterns.
+///
+/// When calling itself recursively, a `@match_str` marker is used to mark that the macro is
+/// inside an inner call. The marker is follows by an initial state, which consists of three parts.
+/// The first part is the condition expression, which is built by all the match patterns of that
+/// arm. The second part is the binding for the input string. The third part is the binding used
+/// for that match arm.
+///
+/// The third part of the state initially starts out empty, but is later replaced by either a
+/// binding expression or a `@no_bindings` marker. The marker allows the condition to grow with
+/// other patterns in the same match arm.
+macro_rules! match_str {
+ // Start of matching
+ ( ($string:expr) $(|)* $( $match_body:tt )* ) => {
+ {
+ let string_to_match = $string;
+
+ match_str!(@match_str((false), string_to_match) | $( $match_body )*)
+ }
+ };
+
+ // Match a whole string
+ (
+ @match_str($conditions:tt, $input:ident $(, @no_bindings)*)
+ | [$string:literal] $( $rest:tt )*
+ ) => {
+ match_str!(@match_str(($conditions || $input == $string), $input, @no_bindings) $( $rest )*)
+ };
+
+ // Match a string with a given prefix
+ (
+ @match_str($conditions:tt, $input:ident $(, @no_bindings)*)
+ | [$prefix:literal, ..] $( $rest:tt )*
+ ) => {
+ match_str!(
+ @match_str(($conditions || $input.starts_with($prefix)), $input, @no_bindings)
+ $( $rest )*
+ )
+ };
+
+ // Match a string with a given suffix
+ (
+ @match_str($conditions:tt, $input:ident $(, @no_bindings)*)
+ | [.., $suffix:literal] $( $rest:tt )* ) => {
+ match_str!(
+ @match_str(($conditions || $input.ends_with($suffix)), $input, @no_bindings)
+ $( $rest )*
+ )
+ };
+
+ // Match a string with a given prefix and suffix
+ (
+ @match_str($conditions:tt, $input:ident $(, @no_bindings)*)
+ | [$prefix:literal, .., $suffix:literal]
+ $( $rest:tt )*
+ ) => {
+ match_str!(
+ @match_str(
+ ($conditions || ($input.starts_with($prefix) && $input.ends_with($suffix))),
+ $input,
+ @no_bindings
+ )
+ $( $rest )*
+ )
+ };
+
+ // Match a string with a given prefix, binding the rest of the string after the prefix
+ (
+ @match_str($conditions:tt, $input:ident)
+ | [$prefix:literal, $binding:ident] $( $rest:tt )*
+ ) => {
+ match_str!(
+ @match_str(
+ ($conditions || $input.starts_with($prefix)),
+ $input,
+ @binding $binding = &$input[$prefix.len()..]
+ )
+ $( $rest )*
+ )
+ };
+
+ // Match a string with a given suffix, binding the start of the string up to before the suffix
+ (
+ @match_str($conditions:tt, $input:ident)
+ | [$binding:ident, $suffix:literal] $( $rest:tt )*
+ ) => {
+ match_str!(
+ @match_str(
+ ($conditions || $input.ends_with($suffix)),
+ $input,
+ @binding $binding = &$input[..($input.len()-$suffix.len())]
+ )
+ $( $rest )*
+ )
+ };
+
+ // Match a string with a given prefix and suffix, binding the middle of the string, starting
+ // after the prefix and ending before the suffix
+ (
+ @match_str($conditions:tt, $input:ident)
+ | [$prefix:literal, $binding:ident, $suffix:literal] $( $rest:tt )*
+ ) => {
+ match_str!(
+ @match_str(
+ ($conditions || ($input.starts_with($prefix) && $input.ends_with($suffix))),
+ $input,
+ @binding $binding = &$input[$prefix.len()..($input.len()-$suffix.len())]
+ )
+ $( $rest )*
+ )
+ };
+
+ // Final empty `else` body
+ ( @match_str((false), $input:ident) |) => { {} };
+
+ // Final empty `else` body
+ ( @match_str((false), $input:ident) | _ => $body:expr $(,)*) => {
+ {
+ $body
+ }
+ };
+
+ // Final `else` body with a catch-all binding
+ ( @match_str((false), $input:ident) | $binding:ident => $body:expr $(,)* ) => {
+ {
+ let $binding = $input;
+
+ $body
+ }
+ };
+
+ // Build `if` body
+ (
+ @match_str($conditions:tt, $input:ident, @no_bindings)
+ => $body:expr , $(,)* $(|)* $( $rest:tt )*
+ ) => {
+ if $conditions {
+ $body
+ } else {
+ match_str!(@match_str((false), $input) | $( $rest )*)
+ }
+ };
+
+ // Build `if` body with a specified binding
+ (
+ @match_str($conditions:tt, $input:ident, @binding $binding:ident = $binding_expr:expr)
+ => $body:expr , $(,)* $(|)* $( $rest:tt )*
+ ) => {
+ if $conditions {
+ let $binding = $binding_expr;
+
+ $body
+ } else {
+ match_str!(@match_str((false), $input) | $( $rest )*)
+ }
+ };
+}
diff --git a/android/translations-converter/src/gettext/messages.rs b/android/translations-converter/src/gettext/messages.rs
new file mode 100644
index 0000000000..c8a29bd734
--- /dev/null
+++ b/android/translations-converter/src/gettext/messages.rs
@@ -0,0 +1,116 @@
+use super::{msg_string::MsgString, parser::Parser, plural_form::PluralForm};
+use derive_more::{Display, Error, From};
+use std::{
+ fs::File,
+ io::{BufRead, BufReader},
+ path::Path,
+};
+
+/// A parsed gettext messages file.
+#[derive(Clone, Debug, Default)]
+pub struct Messages {
+ pub plural_form: Option<PluralForm>,
+ entries: Vec<MsgEntry>,
+}
+
+/// A message entry in a gettext translation file.
+#[derive(Clone, Debug)]
+pub struct MsgEntry {
+ pub id: MsgString,
+ pub value: MsgValue,
+}
+
+/// A message string or plural set in a gettext translation file.
+#[derive(Clone, Debug)]
+pub enum MsgValue {
+ Invariant(MsgString),
+ Plural {
+ plural_id: MsgString,
+ values: Vec<MsgString>,
+ },
+}
+
+impl Messages {
+ /// Load message entries from a gettext translation file.
+ ///
+ /// See [`Parser`] for more information.
+ pub fn from_file(file_path: impl AsRef<Path>) -> Result<Self, Error> {
+ let file = BufReader::new(File::open(file_path).expect("Failed to open gettext file"));
+ let mut parser = Parser::new();
+
+ for line in file.lines() {
+ parser.parse_line(&line?)?;
+ }
+
+ Ok(parser.finish()?)
+ }
+
+ /// Construct an empty messages list configured with the specified plural form.
+ pub fn with_plural_form(plural_form: PluralForm) -> Self {
+ Messages {
+ plural_form: Some(plural_form),
+ entries: Vec::new(),
+ }
+ }
+
+ /// Create a messages list with a single non-plural entry.
+ ///
+ /// The plural form for the messages is left unconfigured.
+ pub fn starting_with(id: MsgString, msg_str: MsgString) -> Self {
+ let first_entry = MsgEntry {
+ id,
+ value: MsgValue::Invariant(msg_str),
+ };
+
+ Messages {
+ plural_form: None,
+ entries: vec![first_entry],
+ }
+ }
+
+ /// Add a non-plural entry.
+ pub fn add(&mut self, id: MsgString, msg_str: MsgString) {
+ let entry = MsgEntry {
+ id,
+ value: MsgValue::Invariant(msg_str),
+ };
+
+ self.entries.push(entry);
+ }
+
+ /// Add a plural entry.
+ pub fn add_plural(&mut self, id: MsgString, plural_id: MsgString, values: Vec<MsgString>) {
+ let entry = MsgEntry {
+ id,
+ value: MsgValue::Plural { plural_id, values },
+ };
+
+ self.entries.push(entry);
+ }
+}
+
+impl IntoIterator for Messages {
+ type Item = MsgEntry;
+ type IntoIter = std::vec::IntoIter<Self::Item>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.entries.into_iter()
+ }
+}
+
+impl From<MsgString> for MsgValue {
+ fn from(string: MsgString) -> Self {
+ MsgValue::Invariant(string)
+ }
+}
+
+#[derive(Debug, Display, Error, From)]
+pub enum Error {
+ /// Parser error while parsing file
+ #[display(fmt = "Failed to parse input file")]
+ Parse(super::parser::Error),
+
+ /// IO error while reading input file.
+ #[display(fmt = "Failed to read from the input file")]
+ Io(std::io::Error),
+}
diff --git a/android/translations-converter/src/gettext/mod.rs b/android/translations-converter/src/gettext/mod.rs
index 1bed501b72..72601abf0e 100644
--- a/android/translations-converter/src/gettext/mod.rs
+++ b/android/translations-converter/src/gettext/mod.rs
@@ -1,189 +1,21 @@
+#[macro_use]
+mod match_str;
+mod messages;
mod msg_string;
+mod parser;
mod plural_form;
use std::{
- collections::BTreeMap,
- fs::{File, OpenOptions},
- io::{self, BufRead, BufReader, BufWriter, Write},
- mem,
+ fs::OpenOptions,
+ io::{self, BufWriter, Write},
path::Path,
};
-pub use self::{msg_string::MsgString, plural_form::PluralForm};
-
-/// A parsed gettext translation file.
-#[derive(Clone, Debug)]
-pub struct Translation {
- pub plural_form: Option<PluralForm>,
- entries: Vec<MsgEntry>,
-}
-
-/// A message entry in a gettext translation file.
-#[derive(Clone, Debug)]
-pub struct MsgEntry {
- pub id: MsgString,
- pub value: MsgValue,
-}
-
-/// A message string or plural set in a gettext translation file.
-#[derive(Clone, Debug)]
-pub enum MsgValue {
- Invariant(MsgString),
- Plural {
- plural_id: MsgString,
- values: Vec<MsgString>,
- },
-}
-
-/// A helper macro to match a string to various prefix and suffix combinations.
-macro_rules! match_str {
- (
- ( $string:expr )
- $( [$start:expr, $middle:ident, $end:expr] => $body:tt )*
- _ => $else:expr $(,)*
- ) => {
- $(
- if let Some($middle) = parse_line($string, $start, $end) {
- $body
- } else
- )* {
- $else
- }
- };
-}
-
-impl Translation {
- /// Load message entries from a gettext translation file.
- ///
- /// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed
- /// that the header value is one of some hard-coded values, so if new languages that have new
- /// plurals are added, the code will have to be updated.
- ///
- /// An gettext translation file has the format in the example below:
- ///
- /// ```
- /// # The start of the file can contain empty entries to include some header with meta
- /// # information. Below is the header indicating the plural format.
- /// msgid ""
- /// msgstr ""
- /// "Plural-Forms: nplurals=2; plural=(n != 1);"
- ///
- /// # Simple translated messages
- /// msgid "Message in original language"
- /// msgstr "Mesaĝo en tradukita lingvo"
- ///
- /// # Plural translated messages (with two forms)
- /// msgid "One translated message"
- /// msgid_plural "%d translated messages"
- /// msgstr[0] "Unu tradukita mesaĝo"
- /// msgstr[1] "%d tradukitaj mesaĝoj"
- /// ```
- pub fn from_file(file_path: impl AsRef<Path>) -> Self {
- let mut parsing_header = false;
- let mut entries = Vec::new();
- let mut current_id = None;
- let mut current_plural_id = None;
- let mut plural_form = None;
- let mut variants = BTreeMap::new();
-
- let file = BufReader::new(File::open(file_path).expect("Failed to open gettext file"));
- // Ensure there's an empty line at the end so that the "else" part of the string matching
- // code will run for the last message in the file.
- let lines = file
- .lines()
- .map(|line_result| line_result.expect("Failed to read from gettext file"))
- .chain(Some(String::new()));
-
- for line in lines {
- match_str! { (line.trim())
- ["msgid \"", msg_id, "\""] => {
- current_id = Some(MsgString::from_escaped(msg_id));
- }
- ["msgstr \"", translation, "\""] => {
- if let Some(id) = current_id.take() {
- let value = MsgValue::Invariant(MsgString::from_escaped(translation));
-
- parsing_header = id.is_empty() && translation.is_empty();
-
- entries.push(MsgEntry { id, value });
- }
-
- current_id = None;
- current_plural_id = None;
- }
- ["msgid_plural \"", plural_id, "\""] => {
- current_plural_id = Some(MsgString::from_escaped(plural_id));
- parsing_header = false;
- }
- ["msgstr[", plural_translation, "\""] => {
- let variant_id_end = plural_translation
- .chars()
- .position(|character| character == ']')
- .expect("Invalid plural msgstr");
- let variant_id: usize = plural_translation[..variant_id_end]
- .parse()
- .expect("Invalid variant index");
- let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "")
- .expect("Invalid plural msgstr");
-
- variants.insert(variant_id, MsgString::from_escaped(variant_msg));
- parsing_header = false;
- }
- ["\"", header, "\\n\""] => {
- if parsing_header {
- if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") {
- plural_form = PluralForm::from_formula(plural_formula);
- }
- }
- }
- _ => {
- if let Some(plural_id) = current_plural_id.take() {
- let id = current_id.take().expect("Missing msgid for plural message");
- let values = mem::replace(&mut variants, BTreeMap::new())
- .into_iter()
- .enumerate()
- .inspect(|(index, (variant_id, _))| {
- assert_eq!(
- index, variant_id,
- "Unexpected variant ID for plural msgstr"
- )
- })
- .map(|(_, (_, value))| value)
- .collect();
- let value = MsgValue::Plural { plural_id, values };
-
- entries.push(MsgEntry { id, value });
- }
-
- current_id = None;
- current_plural_id = None;
- variants.clear();
- parsing_header = false;
- }
- }
- }
-
- Self {
- entries,
- plural_form,
- }
- }
-}
-
-impl IntoIterator for Translation {
- type Item = MsgEntry;
- type IntoIter = std::vec::IntoIter<Self::Item>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.entries.into_iter()
- }
-}
-
-impl From<MsgString> for MsgValue {
- fn from(string: MsgString) -> Self {
- MsgValue::Invariant(string)
- }
-}
+pub use self::{
+ messages::{Messages, MsgEntry, MsgValue},
+ msg_string::MsgString,
+ plural_form::PluralForm,
+};
/// Append message entries to a translation file.
///
@@ -219,14 +51,3 @@ pub fn append_to_template(
Ok(())
}
-
-fn parse_line<'l>(line: &'l str, prefix: &str, suffix: &str) -> Option<&'l str> {
- if line.starts_with(prefix) && line.ends_with(suffix) {
- let start = prefix.len();
- let end = line.len() - suffix.len();
-
- Some(&line[start..end])
- } else {
- None
- }
-}
diff --git a/android/translations-converter/src/gettext/msg_string.rs b/android/translations-converter/src/gettext/msg_string.rs
index b6ae21aadf..83bcd90c30 100644
--- a/android/translations-converter/src/gettext/msg_string.rs
+++ b/android/translations-converter/src/gettext/msg_string.rs
@@ -1,10 +1,10 @@
use std::{
fmt::{self, Display, Formatter},
- ops::Deref,
+ ops::{Add, AddAssign, Deref},
};
/// A message string in a gettext translation file.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Eq, PartialEq)]
pub struct MsgString(String);
impl MsgString {
@@ -51,6 +51,41 @@ impl Deref for MsgString {
}
}
+impl AsRef<MsgString> for MsgString {
+ fn as_ref(&self) -> &Self {
+ self
+ }
+}
+
+impl<M> AddAssign<M> for MsgString
+where
+ M: AsRef<MsgString>,
+{
+ fn add_assign(&mut self, other: M) {
+ self.0 += &other.as_ref().0;
+ }
+}
+
+impl<M> Add<M> for MsgString
+where
+ M: AsRef<MsgString>,
+{
+ type Output = MsgString;
+
+ fn add(mut self, other: M) -> Self::Output {
+ self += other;
+ self
+ }
+}
+
+impl<'l, 'r> Add<&'r MsgString> for &'l MsgString {
+ type Output = MsgString;
+
+ fn add(self, other: &'r MsgString) -> Self::Output {
+ MsgString(self.0.clone() + &other.0)
+ }
+}
+
#[cfg(test)]
mod tests {
use super::MsgString;
@@ -89,4 +124,40 @@ mod tests {
assert_eq!(input.to_string(), original);
}
+
+ #[test]
+ fn appending() {
+ let mut target = MsgString::from_unescaped(r#""Initial""#);
+ let extra = MsgString::from_escaped(r#"\"Extra\""#);
+
+ target += extra;
+
+ let expected = concat!(r#"\"Initial\"#, r#""\"Extra\""#);
+
+ assert_eq!(target.to_string(), expected);
+ }
+
+ #[test]
+ fn concatenating_by_moving() {
+ let start = MsgString::from_unescaped(r#""Start""#);
+ let end = MsgString::from_escaped(r#"\"End\""#);
+
+ let result = start + end;
+
+ let expected = concat!(r#"\"Start\"#, r#""\"End\""#);
+
+ assert_eq!(result.to_string(), expected);
+ }
+
+ #[test]
+ fn concatenating_by_borrowing() {
+ let start = MsgString::from_escaped(r#"\"Start\""#);
+ let end = MsgString::from_unescaped(r#""End""#);
+
+ let result = &start + &end;
+
+ let expected = concat!(r#"\"Start\"#, r#""\"End\""#);
+
+ assert_eq!(result.to_string(), expected);
+ }
}
diff --git a/android/translations-converter/src/gettext/parser.rs b/android/translations-converter/src/gettext/parser.rs
new file mode 100644
index 0000000000..749922a7aa
--- /dev/null
+++ b/android/translations-converter/src/gettext/parser.rs
@@ -0,0 +1,558 @@
+use super::{Messages, MsgString, PluralForm};
+use derive_more::{Display, Error};
+use std::{collections::BTreeMap, mem};
+
+/// A gettext messages file parser.
+///
+/// Can parse both translations files and template files.
+///
+/// # Usage
+///
+/// The parser works by parsing individual lines. After creating a [`Parser`] instance, the input
+/// lines should be sent to it through repeated calls to [`Parser::parse_line`], and afterwards
+/// calling [`Parser::finish`] to finish parsing and obtain the parsed result.
+///
+/// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed
+/// that the header value is one of some hard-coded values, so if new languages that have new
+/// plurals are added, the code will have to be updated.
+///
+/// # Input example
+///
+/// A gettext translation file has the format in the example below:
+///
+/// ```
+/// # The start of the file can contain empty entries to include some header with meta
+/// # information. Below is the header indicating the plural format.
+/// msgid ""
+/// msgstr ""
+/// "Plural-Forms: nplurals=2; plural=(n != 1);"
+///
+/// # Simple translated messages
+/// msgid "Message in original language"
+/// msgstr "Mesaĝo en tradukita lingvo"
+///
+/// # Plural translated messages (with two forms)
+/// msgid "One translated message"
+/// msgid_plural "%d translated messages"
+/// msgstr[0] "Unu tradukita mesaĝo"
+/// msgstr[1] "%d tradukitaj mesaĝoj"
+/// ```
+#[derive(Debug)]
+pub enum Parser {
+ /// Initial state.
+ ///
+ /// No useful information has been extracted yet.
+ Start,
+
+ /// Possible start of file header.
+ ///
+ /// Found an empty message ID, if the next line is an empty message string the header of the
+ /// file has been found.
+ HeaderStart,
+
+ /// Start of file header found.
+ Header,
+
+ /// Skipping to the end of the header.
+ ///
+ /// The useful information has already been extracted.
+ HeaderEnd(Messages),
+
+ /// Waiting for a next message section.
+ ///
+ /// Parser has completed parsing either at least one valid entry or the file header.
+ Idle(Messages),
+
+ /// New message entry.
+ ///
+ /// Parsed a new message ID.
+ NewEntry { id: MsgString, messages: Messages },
+
+ /// Parsing a message entry.
+ ///
+ /// Parsed a message ID and a message string, but the string could be incomplete with the rest
+ /// of it spread among more lines.
+ InvariantEntry {
+ id: MsgString,
+ message: MsgString,
+ messages: Messages,
+ },
+
+ /// Detected that entry is for a plural.
+ ///
+ /// Found a plural ID, may have parsed variants.
+ NewPluralEntry {
+ id: MsgString,
+ plural_id: MsgString,
+ variants: BTreeMap<usize, MsgString>,
+ messages: Messages,
+ },
+
+ /// Parsing a plural entry variant.
+ ///
+ /// Parsed the start of a plural variant string, but the string could be incomplete with the
+ /// rest of it spread among more lines.
+ PluralEntry {
+ id: MsgString,
+ plural_id: MsgString,
+ index: usize,
+ variant: MsgString,
+ variants: BTreeMap<usize, MsgString>,
+ messages: Messages,
+ },
+
+ /// Internal transition state.
+ ///
+ /// Used while a line is being parsed.
+ Parsing,
+}
+
+impl Parser {
+ /// Create a new [`Parser`] instance.
+ ///
+ /// Parsing can then be done by feeding lines to the instance using [`Parser::parse_line`] and
+ /// finishing with a call to [`Parser::finish`] to obtain the parsed result.
+ pub fn new() -> Self {
+ Parser::Start
+ }
+
+ /// Parse an input line.
+ pub fn parse_line(&mut self, line: &str) -> Result<(), Error> {
+ let state = mem::replace(self, Parser::Parsing);
+
+ *self = match state {
+ Parser::Start => Self::parse_start(line)?,
+ Parser::HeaderStart => Self::parse_header_start(line)?,
+ Parser::Header => Self::parse_header(line)?,
+ Parser::HeaderEnd(messages) => Self::parse_header_end(line, messages)?,
+ Parser::Idle(messages) => Self::parse_idle(line, messages)?,
+ Parser::NewEntry { id, messages } => Self::parse_new_entry(line, id, messages)?,
+ Parser::InvariantEntry {
+ id,
+ message,
+ messages,
+ } => Self::parse_invariant_entry(line, id, message, messages)?,
+ Parser::NewPluralEntry {
+ id,
+ plural_id,
+ variants,
+ messages,
+ } => Self::parse_new_plural_entry(line, id, plural_id, variants, messages)?,
+ Parser::PluralEntry {
+ id,
+ plural_id,
+ index,
+ variant,
+ variants,
+ messages,
+ } => Self::parse_plural_entry(line, id, plural_id, index, variant, variants, messages)?,
+ Parser::Parsing => unreachable!("Parser should never stop on the Parsing state"),
+ };
+
+ Ok(())
+ }
+
+ /// Finish parsing and obtain the parsed [`Messages].
+ pub fn finish(self) -> Result<Messages, Error> {
+ match self {
+ // Input file is empty
+ Parser::Start => Ok(Messages::default()),
+
+ // A single empty msgid was parsed, but no msgstr for that entry (or header)
+ Parser::HeaderStart => Err(Error::IncompleteEntry(MsgString::empty())),
+
+ // Input file only contains headers that were ignored
+ Parser::Header => Ok(Messages::default()),
+
+ // Input file only contains headers, but the plural form was successfully parsed
+ Parser::HeaderEnd(messages) => Ok(messages),
+
+ // Parsing successful
+ Parser::Idle(messages) => Ok(messages),
+
+ // Input file ends on an incomplete entry
+ Parser::NewEntry { id, .. } => Err(Error::IncompleteEntry(id)),
+
+ // Input file ends on an invariant entry
+ Parser::InvariantEntry {
+ id,
+ message,
+ mut messages,
+ } => {
+ messages.add(id, message);
+
+ Ok(messages)
+ }
+
+ // Input file ends with an empty plural entry
+ Parser::NewPluralEntry { id, .. } => Err(Error::IncompletePluralEntry(id)),
+
+ // Input file ends with a plural entry (it might be missing variants)
+ Parser::PluralEntry {
+ id,
+ plural_id,
+ index,
+ variant,
+ mut variants,
+ mut messages,
+ } => {
+ variants.insert(index, variant);
+
+ let variants = collect_variants(&id, variants)?;
+
+ messages.add_plural(id, plural_id, variants);
+
+ Ok(messages)
+ }
+
+ Parser::Parsing => unreachable!("Parser should never stop on the Parsing state"),
+ }
+ }
+
+ fn parse_start(line: &str) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore empty lines and comment lines
+ [""] | ["#", ..] => Parser::Start,
+
+ // An empty message ID may indicate the start of the header
+ ["msgid \"\""] => Parser::HeaderStart,
+
+ // Headers don't have context, so skip it and get ready to parse entries
+ ["msgctxt ", ..] => Parser::Idle(Messages::default()),
+
+ // File has no header, went directly to the first entry
+ ["msgid \"", msg_id, "\""] => Parser::NewEntry {
+ id: MsgString::from_escaped(msg_id),
+ messages: Messages::default()
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_header_start(line: &str) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore comment lines
+ ["#", ..] => Parser::HeaderStart,
+
+ // An empty message string confirms the start of the header
+ ["msgstr \"\""] => Parser::Header,
+
+ // A non-empty message string means an entry with an empty ID has been parsed
+ ["msgstr \"", string, "\""] => Parser::Idle(
+ Messages::starting_with(MsgString::empty(), MsgString::from_escaped(string))
+ ),
+
+ // A plural ID means this is the start of a plural entry with an empty ID
+ ["msgid_plural \"", plural_id, "\""] => Parser::NewPluralEntry {
+ id: MsgString::empty(),
+ plural_id: MsgString::from_escaped(plural_id),
+ variants: BTreeMap::new(),
+ messages: Messages::default(),
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_header(line: &str) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore comment lines
+ ["#", ..] => Parser::HeaderStart,
+
+ // An empty line marks the end of the header
+ [""] => Parser::Idle(Messages::default()),
+
+ // The Plural-Forms header is the only header that's currently used, so after finding
+ // it the parser can skip to the end of the headers
+ ["\"Plural-Forms: ", plural_formula, ";\\n\""] => {
+ let plural_form = PluralForm::from_formula(plural_formula)
+ .ok_or_else(|| Error::UnrecognizedPluralFormula(plural_formula.to_owned()))?;
+
+ Parser::HeaderEnd(Messages::with_plural_form(plural_form))
+ },
+
+ // Skip other headers
+ ["\"", .., "\\n\""] => Parser::Header,
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_header_end(line: &str, messages: Messages) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // An empty line marks the end of the header
+ [""] => Parser::Idle(messages),
+
+ // Ignore comment lines
+ ["#", ..] => Parser::HeaderEnd(messages),
+
+ // Skip any other headers
+ ["\"", .., "\\n\""] => Parser::HeaderEnd(messages),
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_idle(line: &str, messages: Messages) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore empty lines, comment lines and message context lines
+ [""] | ["#", ..] | ["msgctxt ", ..] => Parser::Idle(messages),
+
+ // Start of a new message entry
+ ["msgid \"", msg_id, "\""] => Parser::NewEntry {
+ id: MsgString::from_escaped(msg_id),
+ messages,
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_new_entry(line: &str, id: MsgString, messages: Messages) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore comment lines
+ ["#", ..] => Parser::NewEntry { id, messages },
+
+ // A message string for an invariant entry
+ ["msgstr \"", string, "\""] => Parser::InvariantEntry {
+ id,
+ message: MsgString::from_escaped(string),
+ messages,
+ },
+
+ // A plural ID means this is the start of a plural entry
+ ["msgid_plural \"", plural_id, "\""] => Parser::NewPluralEntry {
+ id,
+ plural_id: MsgString::from_escaped(plural_id),
+ variants: BTreeMap::new(),
+ messages,
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_invariant_entry(
+ line: &str,
+ id: MsgString,
+ mut message: MsgString,
+ mut messages: Messages,
+ ) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore comment lines
+ ["#", ..] => Parser::InvariantEntry { id, message, messages },
+
+ // The entry message string continues on this line
+ ["\"", string, "\""] => {
+ message += MsgString::from_escaped(string);
+
+ Parser::InvariantEntry { id, message, messages }
+ },
+
+ // End of the entry
+ [""] => {
+ messages.add(id, message);
+
+ Parser::Idle(messages)
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_new_plural_entry(
+ line: &str,
+ id: MsgString,
+ plural_id: MsgString,
+ variants: BTreeMap<usize, MsgString>,
+ mut messages: Messages,
+ ) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore comment lines
+ ["#", ..] => Parser::NewPluralEntry { id, plural_id, variants, messages },
+
+ // A message string for a plural variant
+ ["msgstr[", index_and_string, "\""] => {
+ let (index, variant) = extract_plural_variant(index_and_string)?;
+
+ Parser::PluralEntry {
+ id,
+ plural_id,
+ index,
+ variant,
+ variants,
+ messages,
+ }
+ },
+
+ // An empty line marks the end of the plural entry
+ [""] => {
+ let variants = collect_variants(&id, variants)?;
+
+ messages.add_plural(id, plural_id, variants);
+
+ Parser::Idle(messages)
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+
+ fn parse_plural_entry(
+ line: &str,
+ id: MsgString,
+ plural_id: MsgString,
+ index: usize,
+ mut variant: MsgString,
+ mut variants: BTreeMap<usize, MsgString>,
+ mut messages: Messages,
+ ) -> Result<Parser, Error> {
+ let next_state = match_str! { (line)
+ // Ignore comment lines
+ ["#", ..] => {
+ Parser::PluralEntry { id, plural_id, index, variant, variants, messages }
+ },
+
+ // The variant message string continues on this line
+ ["\"", string, "\""] => {
+ variant += MsgString::from_escaped(string);
+
+ Parser::PluralEntry {
+ id,
+ plural_id,
+ index,
+ variant,
+ variants,
+ messages
+ }
+ },
+
+ // A message string indicating the end of the current variant and th start of another
+ ["msgstr[", index_and_string, "\""] => {
+ let (new_index, new_variant) = extract_plural_variant(index_and_string)?;
+
+ variants.insert(index, variant);
+
+ Parser::PluralEntry {
+ id,
+ plural_id,
+ index: new_index,
+ variant: new_variant,
+ variants,
+ messages,
+ }
+ },
+
+ // An empty line marks the end of the plural entry (and hence the current variant as
+ // well)
+ [""] => {
+ variants.insert(index, variant);
+
+ let variants = collect_variants(&id, variants)?;
+
+ messages.add_plural(id, plural_id, variants);
+
+ Parser::Idle(messages)
+ },
+
+ other => return Err(Error::UnexpectedLine(other.to_owned())),
+ };
+
+ Ok(next_state)
+ }
+}
+
+/// Helper function to extract the plural variant index and message.
+///
+/// The parser will try to parse a plural line of the form `msgstr[1] "%d tradukitaj mesaĝoj"`.
+/// When matching the line to the expected template, it will remove the `msgstr[` prefix and the
+/// `"` suffix. This function will then parse the rest of the string (`1] "%d tradukitaj mesaĝoj`)
+/// by extracting the index (1), and then extracting the message string by skipping the separator
+/// (`] "`).
+fn extract_plural_variant(index_and_string: &str) -> Result<(usize, MsgString), Error> {
+ let recreate_line = || format!("msgstr[{}\"", index_and_string);
+
+ let parts: Vec<_> = index_and_string.splitn(2, "] \"").collect();
+
+ if parts.len() != 2 {
+ return Err(Error::InvalidPluralVariant(recreate_line()));
+ }
+
+ let index_string = parts[0];
+ let message_string = parts[1];
+
+ let index = index_string
+ .parse()
+ .map_err(|_| Error::InvalidPluralIndex(recreate_line()))?;
+
+ let variant_message = MsgString::from_escaped(message_string);
+
+ Ok((index, variant_message))
+}
+
+/// Helper function to collect parsed variants.
+///
+/// This will return only the variant messages in index order. The function will return an error if
+/// any variant index is missing.
+fn collect_variants(
+ id: &MsgString,
+ variant_map: BTreeMap<usize, MsgString>,
+) -> Result<Vec<MsgString>, Error> {
+ let index_count = variant_map.len();
+
+ for index in 0..index_count {
+ if !variant_map.contains_key(&index) {
+ return Err(Error::IncompletePluralEntry(id.clone()));
+ }
+ }
+
+ Ok(variant_map
+ .into_iter()
+ .map(|(_, variant)| variant)
+ .collect())
+}
+
+/// Parsing errors.
+#[derive(Clone, Debug, Display, Error, Eq, PartialEq)]
+pub enum Error {
+ /// An unexpected line was read while parsing.
+ #[display(fmt = "Unexpected line parsing gettext messages: {}", _0)]
+ UnexpectedLine(#[error(not(source))] String),
+
+ /// Input uses an unrecognized plural forumal.
+ #[display(fmt = "Input uses an unrecognized formula for the plural form: {}", _0)]
+ UnrecognizedPluralFormula(#[error(not(source))] String),
+
+ /// Input ended with an incomplete entry.
+ #[display(fmt = "Input ended with an incomplete gettext entry with ID: {}", _0)]
+ IncompleteEntry(#[error(not(source))] MsgString),
+
+ /// Plural entry definition is missing a plural variant.
+ #[display(fmt = "Plural entry is missing a plural variant: {}", _0)]
+ IncompletePluralEntry(#[error(not(source))] MsgString),
+
+ /// Plural variant is invalid.
+ #[display(fmt = "Plural variant line is invalid: {}", _0)]
+ InvalidPluralVariant(#[error(not(source))] String),
+
+ /// Plural variant index was not parsable.
+ #[display(fmt = "Plural variant line contains an invalid index: {}", _0)]
+ InvalidPluralIndex(#[error(not(source))] String),
+}
diff --git a/android/translations-converter/src/gettext/plural_form.rs b/android/translations-converter/src/gettext/plural_form.rs
index c55066c7b7..e09e9cfd04 100644
--- a/android/translations-converter/src/gettext/plural_form.rs
+++ b/android/translations-converter/src/gettext/plural_form.rs
@@ -1,3 +1,6 @@
+use derive_more::{Display, Error};
+use std::str::FromStr;
+
/// Known plural forms.
#[derive(Clone, Copy, Debug)]
pub enum PluralForm {
@@ -29,3 +32,19 @@ impl PluralForm {
}
}
}
+
+impl FromStr for PluralForm {
+ type Err = UnsupportedPluralFormulaError;
+
+ fn from_str(string: &str) -> Result<Self, Self::Err> {
+ PluralForm::from_formula(string)
+ .ok_or_else(|| UnsupportedPluralFormulaError(string.to_owned()))
+ }
+}
+
+/// Failed to create [`PluralForm`] from specified plural formula.
+///
+/// The formula could be an invalid formula, or support for it hasn't been added yet.
+#[derive(Clone, Debug, Display, Error)]
+#[display(fmt = "Unsupported plural formula: {}", _0)]
+pub struct UnsupportedPluralFormulaError(#[error(not(source))] String);
diff --git a/android/translations-converter/src/main.rs b/android/translations-converter/src/main.rs
index d11320b070..8444470cd3 100644
--- a/android/translations-converter/src/main.rs
+++ b/android/translations-converter/src/main.rs
@@ -98,19 +98,23 @@ fn main() {
fs::create_dir(&destination_dir).expect("Failed to create Android locale directory");
}
+ let translations = gettext::Messages::from_file(&locale_file)
+ .expect("Failed to load translations for a locale");
+
generate_translations(
locale,
known_urls.clone(),
known_strings.clone(),
known_plurals.clone(),
- gettext::Translation::from_file(&locale_file),
+ translations,
destination_dir.join("strings.xml"),
destination_dir.join("plurals.xml"),
);
}
let template_path = locale_dir.join("messages.pot");
- let template = gettext::Translation::from_file(&template_path);
+ let template = gettext::Messages::from_file(&template_path)
+ .expect("Failed to load messages template file");
let mut missing_translations = known_strings;
let mut missing_plurals: HashMap<_, _> = known_plurals;
@@ -228,7 +232,7 @@ fn generate_translations(
known_urls: HashMap<String, String>,
mut known_strings: HashMap<String, String>,
mut known_plurals: HashMap<String, String>,
- translations: gettext::Translation,
+ translations: gettext::Messages,
strings_output_path: impl AsRef<Path>,
plurals_output_path: impl AsRef<Path>,
) {