diff options
| author | Janito Vaqueiro Ferreira Filho <janito@mullvad.net> | 2021-05-19 11:34:28 -0300 |
|---|---|---|
| committer | Janito Vaqueiro Ferreira Filho <janito@mullvad.net> | 2021-05-19 11:34:28 -0300 |
| commit | 4cf35350a9d86c81d78acaed00e582ce045ccb3c (patch) | |
| tree | 976a7c2333ed65da160aeb4d3e4ff86b3abe7f1b /android | |
| parent | 6b2a852d25ba0cfe5bd06bd00f32f6ccdf6314b7 (diff) | |
| parent | 7de3c8757b191a403e90af439dcc9377911794d6 (diff) | |
| download | mullvadvpn-4cf35350a9d86c81d78acaed00e582ce045ccb3c.tar.xz mullvadvpn-4cf35350a9d86c81d78acaed00e582ce045ccb3c.zip | |
Merge branch 'refactor-translations-converter'
Diffstat (limited to 'android')
| -rw-r--r-- | android/translations-converter/src/android.rs | 347 | ||||
| -rw-r--r-- | android/translations-converter/src/android/mod.rs | 9 | ||||
| -rw-r--r-- | android/translations-converter/src/android/plurals.rs | 148 | ||||
| -rw-r--r-- | android/translations-converter/src/android/string_value.rs | 215 | ||||
| -rw-r--r-- | android/translations-converter/src/android/strings.rs | 189 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/mod.rs (renamed from android/translations-converter/src/gettext.rs) | 106 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/msg_string.rs | 74 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/plural_form.rs | 31 | ||||
| -rw-r--r-- | android/translations-converter/src/main.rs | 71 | ||||
| -rw-r--r-- | android/translations-converter/src/normalize.rs | 101 |
10 files changed, 805 insertions, 486 deletions
diff --git a/android/translations-converter/src/android.rs b/android/translations-converter/src/android.rs deleted file mode 100644 index f49f50025c..0000000000 --- a/android/translations-converter/src/android.rs +++ /dev/null @@ -1,347 +0,0 @@ -use lazy_static::lazy_static; -use regex::Regex; -use serde::{Deserialize, Serialize}; -use std::{ - fmt::{self, Display, Formatter}, - ops::{Deref, DerefMut}, -}; - -lazy_static! { - static ref LINE_BREAKS: Regex = Regex::new(r"\s*\n\s*").unwrap(); - static ref APOSTROPHES: Regex = Regex::new(r"\\'").unwrap(); - static ref DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap(); - static ref PARAMETERS: Regex = Regex::new(r"%[0-9]*\$").unwrap(); -} - -/// Contents of an Android string resources file. -/// -/// This type can be created directly deserializing the `strings.xml` file. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct StringResources { - #[serde(rename = "string")] - entries: Vec<StringResource>, -} - -/// An entry in an Android string resources file. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct StringResource { - /// The string resource ID. - pub name: String, - - /// If the string should be translated or not. - #[serde(default = "default_translatable")] - pub translatable: bool, - - /// The string value. - #[serde(rename = "$value")] - pub value: StringValue, -} - -impl StringResources { - /// Create an empty list of Android string resources. - pub fn new() -> Self { - StringResources { - entries: Vec::new(), - } - } - - /// Normalize the strings into a common format. - /// - /// Allows the string values to be compared to the gettext messages. - pub fn normalize(&mut self) { - for entry in &mut self.entries { - entry.normalize(); - } - } - - /// Sorts the entries alphabetically based on their IDs. - pub fn sort(&mut self) { - self.entries - .sort_by(|left, right| left.name.cmp(&right.name)); - } -} - -impl Deref for StringResources { - type Target = Vec<StringResource>; - - fn deref(&self) -> &Self::Target { - &self.entries - } -} - -impl DerefMut for StringResources { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.entries - } -} - -impl IntoIterator for StringResources { - type Item = StringResource; - type IntoIter = std::vec::IntoIter<Self::Item>; - - fn into_iter(self) -> Self::IntoIter { - self.entries.into_iter() - } -} - -impl StringResource { - /// Create a new Android string resource entry. - /// - /// The name is the resource ID, and the value will be properly escaped. - pub fn new(name: String, value: &str) -> Self { - StringResource { - name, - translatable: true, - value: StringValue::from(value), - } - } - - /// Normalize the string value into a common format. - /// - /// Makes it possible to compare the Android strings with the gettext messages. - pub fn normalize(&mut self) { - self.value.normalize(); - } -} - -fn default_translatable() -> bool { - true -} - -// Unfortunately, direct serialization to XML isn't working correctly. -impl Display for StringResources { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?; - writeln!(formatter, "<resources>")?; - - for string in &self.entries { - writeln!(formatter, " {}", string)?; - } - - writeln!(formatter, "</resources>") - } -} - -impl Display for StringResource { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - if self.translatable { - write!( - formatter, - r#"<string name="{}">{}</string>"#, - self.name, self.value - ) - } else { - write!( - formatter, - r#"<string name="{}" translatable="false">{}</string>"#, - self.name, self.value - ) - } - } -} - -/// Contents of an Android plurals resources file. -/// -/// This type can be created directly deserializing the `plurals.xml` file. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct PluralResources { - #[serde(rename = "plurals")] - entries: Vec<PluralResource>, -} - -/// An entry in an Android plurals resources file. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct PluralResource { - /// The plural resource ID. - pub name: String, - - /// The items of the plural resource, one for each quantity variant. - #[serde(rename = "item")] - pub items: Vec<PluralVariant>, -} - -/// A string resource for a specific quantity. -/// -/// This is part of a plural resource. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct PluralVariant { - /// The quantity for this variant to be used. - pub quantity: PluralQuantity, - - /// The string value - #[serde(rename = "$value")] - pub string: StringValue, -} - -/// A valid quantity for a plural variant. -#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] -#[serde(rename_all = "snake_case")] -pub enum PluralQuantity { - Zero, - One, - Few, - Many, - Other, -} - -impl Deref for PluralResources { - type Target = Vec<PluralResource>; - - fn deref(&self) -> &Self::Target { - &self.entries - } -} - -impl DerefMut for PluralResources { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.entries - } -} - -impl IntoIterator for PluralResources { - type Item = PluralResource; - type IntoIter = std::vec::IntoIter<Self::Item>; - - fn into_iter(self) -> Self::IntoIter { - self.entries.into_iter() - } -} - -impl PluralResources { - /// Create an empty list of plural resources. - pub fn new() -> Self { - PluralResources { - entries: Vec::new(), - } - } -} - -impl PluralResource { - /// Create a plural resource representation. - /// - /// The resource has a name, used as the identifier, and a list of items. Each item contains - /// the message and the quantity it should be used for. - pub fn new(name: String, values: impl Iterator<Item = (PluralQuantity, String)>) -> Self { - let items = values - .map(|(quantity, string)| PluralVariant { - quantity, - string: StringValue::from(&*string), - }) - .collect(); - - PluralResource { name, items } - } -} - -impl Display for PluralResources { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?; - writeln!(formatter, "<resources>")?; - - for entry in &self.entries { - write!(formatter, "{}", entry)?; - } - - writeln!(formatter, "</resources>") - } -} - -impl Display for PluralResource { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - writeln!(formatter, r#" <plurals name="{}">"#, self.name)?; - - for item in &self.items { - writeln!(formatter, " {}", item)?; - } - - writeln!(formatter, " </plurals>") - } -} - -impl Display for PluralVariant { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - write!( - formatter, - r#"<item quantity="{}">{}</item>"#, - self.quantity, self.string - ) - } -} - -impl Display for PluralQuantity { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - let quantity = match self { - PluralQuantity::Zero => "zero", - PluralQuantity::One => "one", - PluralQuantity::Few => "few", - PluralQuantity::Many => "many", - PluralQuantity::Other => "other", - }; - - write!(formatter, "{}", quantity) - } -} - -/// An Android string value -/// -/// Handles escaping the string when it is created but also allows normalizing it for comparing it -/// with gettext messages through a `normalize` method. -#[derive(Clone, Debug, Eq, Deserialize, Hash, PartialEq, Serialize)] -pub struct StringValue(String); - -impl From<&str> for StringValue { - fn from(string: &str) -> Self { - let value_with_parameters = htmlize::escape_text(string) - .replace(r"\", r"\\") - .replace("\"", "\\\"") - .replace(r"'", r"\'"); - - let mut parts = value_with_parameters.split("%"); - let mut value = parts.next().unwrap().to_owned(); - - for (index, part) in parts.enumerate() { - value.push_str(&format!("%{}$", index + 1)); - value.push_str(part); - } - - StringValue(value) - } -} - -impl StringValue { - /// Normalize the string value into a common format. - /// - /// Makes it possible to compare the Android strings with the gettext messages. - pub fn normalize(&mut self) { - // Collapse line breaks present in the XML file - let value = LINE_BREAKS.replace_all(&self.0, " "); - // Unescape apostrophes - let value = APOSTROPHES.replace_all(&value, "'"); - // Unescape double quotes - let value = DOUBLE_QUOTES.replace_all(&value, r#"""#); - // Mark where parameters are positioned, removing the parameter index - let value = PARAMETERS.replace_all(&value, "%"); - - // Unescape XML characters - self.0 = htmlize::unescape(value.as_bytes()); - } - - /// Clones the internal string value. - pub fn to_string(&self) -> String { - self.0.clone() - } -} - -impl Deref for StringValue { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.0.as_str() - } -} - -impl Display for StringValue { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - write!(formatter, "{}", self.0) - } -} diff --git a/android/translations-converter/src/android/mod.rs b/android/translations-converter/src/android/mod.rs new file mode 100644 index 0000000000..8bc9f8e41e --- /dev/null +++ b/android/translations-converter/src/android/mod.rs @@ -0,0 +1,9 @@ +mod plurals; +mod string_value; +mod strings; + +pub use self::{ + plurals::{PluralQuantity, PluralResource, PluralResources}, + string_value::StringValue, + strings::{StringResource, StringResources}, +}; diff --git a/android/translations-converter/src/android/plurals.rs b/android/translations-converter/src/android/plurals.rs new file mode 100644 index 0000000000..6378eb502c --- /dev/null +++ b/android/translations-converter/src/android/plurals.rs @@ -0,0 +1,148 @@ +use super::string_value::StringValue; +use serde::{Deserialize, Serialize}; +use std::{ + fmt::{self, Display, Formatter}, + ops::{Deref, DerefMut}, +}; + +/// Contents of an Android plurals resources file. +/// +/// This type can be created directly deserializing the `plurals.xml` file. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct PluralResources { + #[serde(rename = "plurals")] + entries: Vec<PluralResource>, +} + +/// An entry in an Android plurals resources file. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct PluralResource { + /// The plural resource ID. + pub name: String, + + /// The items of the plural resource, one for each quantity variant. + #[serde(rename = "item")] + pub items: Vec<PluralVariant>, +} + +/// A string resource for a specific quantity. +/// +/// This is part of a plural resource. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct PluralVariant { + /// The quantity for this variant to be used. + pub quantity: PluralQuantity, + + /// The string value + #[serde(rename = "$value")] + pub string: StringValue, +} + +/// A valid quantity for a plural variant. +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum PluralQuantity { + Zero, + One, + Few, + Many, + Other, +} + +impl Deref for PluralResources { + type Target = Vec<PluralResource>; + + fn deref(&self) -> &Self::Target { + &self.entries + } +} + +impl DerefMut for PluralResources { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.entries + } +} + +impl IntoIterator for PluralResources { + type Item = PluralResource; + type IntoIter = std::vec::IntoIter<Self::Item>; + + fn into_iter(self) -> Self::IntoIter { + self.entries.into_iter() + } +} + +impl PluralResources { + /// Create an empty list of plural resources. + pub fn new() -> Self { + PluralResources { + entries: Vec::new(), + } + } +} + +impl PluralResource { + /// Create a plural resource representation. + /// + /// The resource has a name, used as the identifier, and a list of items. Each item contains + /// the message and the quantity it should be used for. + pub fn new(name: String, values: impl Iterator<Item = (PluralQuantity, String)>) -> Self { + let items = values + .map(|(quantity, string)| PluralVariant { + quantity, + string: StringValue::from_unescaped(&string), + }) + .collect(); + + PluralResource { name, items } + } +} + +impl Display for PluralResources { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?; + writeln!(formatter, "<resources>")?; + + for entry in &self.entries { + write!(formatter, "{}", entry)?; + } + + writeln!(formatter, "</resources>") + } +} + +impl Display for PluralResource { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + writeln!(formatter, r#" <plurals name="{}">"#, self.name)?; + + for item in &self.items { + writeln!(formatter, " {}", item)?; + } + + writeln!(formatter, " </plurals>") + } +} + +impl Display for PluralVariant { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + write!( + formatter, + r#"<item quantity="{}">{}</item>"#, + self.quantity, self.string + ) + } +} + +impl Display for PluralQuantity { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + let quantity = match self { + PluralQuantity::Zero => "zero", + PluralQuantity::One => "one", + PluralQuantity::Few => "few", + PluralQuantity::Many => "many", + PluralQuantity::Other => "other", + }; + + write!(formatter, "{}", quantity) + } +} diff --git a/android/translations-converter/src/android/string_value.rs b/android/translations-converter/src/android/string_value.rs new file mode 100644 index 0000000000..bd202f16fb --- /dev/null +++ b/android/translations-converter/src/android/string_value.rs @@ -0,0 +1,215 @@ +use lazy_static::lazy_static; +use regex::Regex; +use serde::{Deserialize, Deserializer, Serialize}; +use std::{ + fmt::{self, Display, Formatter}, + ops::Deref, +}; + +/// An Android string value +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize)] +pub struct StringValue(String); + +impl StringValue { + /// Create a `StringValue` from an unescaped string. + /// + /// The string will be properly escaped, and all parameters will have indices added to them if + /// they don't have any. Indices are assigned sequentially starting from the previously + /// specified index plus one, or starting from one if there aren't any previously specified + /// indices. + pub fn from_unescaped(string: &str) -> Self { + let value_with_parameters = htmlize::escape_text(string) + .replace(r"\", r"\\") + .replace("\"", "\\\"") + .replace(r"'", r"\'"); + + let value_without_line_breaks = Self::collapse_line_breaks(value_with_parameters); + let value = Self::ensure_parameters_are_indexed(value_without_line_breaks); + + StringValue(value) + } + + /// The input XML file might have line breaks inside the string, and they should be collapsed + /// into a single whitespace character. + fn collapse_line_breaks(original: String) -> String { + lazy_static! { + static ref LINE_BREAKS: Regex = Regex::new(r"\s*\n\s*").unwrap(); + } + + LINE_BREAKS.replace_all(&original, " ").into_owned() + } + + /// This helper method ensures parameters are in the form of `%4$d`, i.e., it will ensure that + /// there is the `<number>$` part. + /// + /// A typical input would be something like `Things are %d, %3$s and %s`, and this method + /// would update the string so that all parameters have indices: `Things are %1$d, %3$s and + /// %4$s`. + fn ensure_parameters_are_indexed(original: String) -> String { + lazy_static! { + static ref PARAMETER_INDEX: Regex = Regex::new(r"^(\d+)\$").unwrap(); + } + + let mut parts = original.split("%"); + let mut output = parts.next().unwrap().to_owned(); + let mut offset = 1; + + for (index, part) in parts.enumerate() { + let index = index as isize; + + if let Some(captures) = PARAMETER_INDEX.captures(part) { + // String already has a parameter index + let specified_index: isize = captures + .get(1) + .expect("Regex has at least one capture group") + .as_str() + .parse() + .expect("First capture group should match an integer"); + + // Update offset so that next parameters without index receive sequential values + // starting after the specified index + offset = specified_index - index; + + // Restore '%' removed during the split + output.push('%'); + } else { + // String doesn't have a parameter index, so it is added + output.push_str(&format!("%{}$", index + offset)); + } + + output.push_str(part); + } + + output + } +} + +impl StringValue { + /// Clones the internal string value. + pub fn to_string(&self) -> String { + self.0.clone() + } +} + +impl Deref for StringValue { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.0.as_str() + } +} + +impl Display for StringValue { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + write!(formatter, "{}", self.0) + } +} + +impl<'de> Deserialize<'de> for StringValue { + fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { + let raw_string = String::deserialize(deserializer)?; + let string_with_collapsed_newlines = Self::collapse_line_breaks(raw_string); + + Ok(StringValue(string_with_collapsed_newlines)) + } +} + +#[cfg(test)] +mod tests { + use super::StringValue; + + #[test] + fn android_escaping() { + let input = StringValue::from_unescaped(concat!( + r"A backslash \", + r#""Inside double quotes""#, + "'Inside single quotes'", + )); + + let expected = concat!( + r"A backslash \\", + r#"\"Inside double quotes\""#, + r"\'Inside single quotes\'", + ); + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn newline_collapsing() { + let input = StringValue::from_unescaped( + "This is + a multi-line string + that should be + collapsed into a single line", + ); + + let expected = "This is a multi-line string that should be collapsed into a single line"; + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn xml_escaping() { + let input = StringValue::from_unescaped(concat!( + "An ampersand: &", + "<tag>A dummy fake XML tag</tag>", + )); + + let expected = concat!( + "An ampersand: &", + r"<tag>A dummy fake XML tag</tag>", + ); + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn doesnt_change_parameter_indices() { + let original = "%1$d %3$s %9$s %6$d %7$d"; + + let input = StringValue::from_unescaped(original); + + assert_eq!(input.to_string(), original); + } + + #[test] + fn adds_parameter_indices() { + let input = StringValue::from_unescaped("%d %s %s %d"); + + let expected = "%1$d %2$s %3$s %4$d"; + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn correctly_updates_generated_index_offset_based_on_existing_indices() { + let input = StringValue::from_unescaped("%d %4$s %d %2$s %d"); + + let expected = "%1$d %4$s %5$d %2$s %3$d"; + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn deserialization() { + #[derive(serde::Deserialize)] + pub struct Wrapper { + #[serde(rename = "$value")] + value: StringValue, + } + + let serialized_input = r#"<root>A multi-line string value + with \"quotes\" and + parameters %2$s %d %1$d</root>"#; + + let deserialized: Wrapper = + serde_xml_rs::from_str(serialized_input).expect("Mal-formed serialized input"); + + let expected = StringValue( + r#"A multi-line string value with \"quotes\" and parameters %2$s %d %1$d"#.to_owned(), + ); + + assert_eq!(deserialized.value, expected); + } +} diff --git a/android/translations-converter/src/android/strings.rs b/android/translations-converter/src/android/strings.rs new file mode 100644 index 0000000000..19fe03e18f --- /dev/null +++ b/android/translations-converter/src/android/strings.rs @@ -0,0 +1,189 @@ +use super::string_value::StringValue; +use serde::{Deserialize, Serialize}; +use std::{ + fmt::{self, Display, Formatter}, + ops::{Deref, DerefMut}, +}; + +/// Contents of an Android string resources file. +/// +/// This type can be created directly deserializing the `strings.xml` file. +#[derive(Clone, Debug, Eq, Deserialize, PartialEq, Serialize)] +pub struct StringResources { + #[serde(rename = "string")] + entries: Vec<StringResource>, +} + +/// An entry in an Android string resources file. +#[derive(Clone, Debug, Eq, Deserialize, PartialEq, Serialize)] +pub struct StringResource { + /// The string resource ID. + pub name: String, + + /// If the string should be translated or not. + #[serde(default = "default_translatable")] + pub translatable: bool, + + /// The string value. + #[serde(rename = "$value")] + pub value: StringValue, +} + +impl StringResources { + /// Create an empty list of Android string resources. + pub fn new() -> Self { + StringResources { + entries: Vec::new(), + } + } + + /// Sorts the entries alphabetically based on their IDs. + pub fn sort(&mut self) { + self.entries + .sort_by(|left, right| left.name.cmp(&right.name)); + } +} + +impl Deref for StringResources { + type Target = Vec<StringResource>; + + fn deref(&self) -> &Self::Target { + &self.entries + } +} + +impl DerefMut for StringResources { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.entries + } +} + +impl IntoIterator for StringResources { + type Item = StringResource; + type IntoIter = std::vec::IntoIter<Self::Item>; + + fn into_iter(self) -> Self::IntoIter { + self.entries.into_iter() + } +} + +impl StringResource { + /// Create a new Android string resource entry. + /// + /// The name is the resource ID, and the value will be properly escaped. + pub fn new(name: String, value: &str) -> Self { + StringResource { + name, + translatable: true, + value: StringValue::from_unescaped(value), + } + } +} + +fn default_translatable() -> bool { + true +} + +// Unfortunately, direct serialization to XML isn't working correctly. +impl Display for StringResources { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?; + writeln!(formatter, "<resources>")?; + + for string in &self.entries { + writeln!(formatter, " {}", string)?; + } + + writeln!(formatter, "</resources>") + } +} + +impl Display for StringResource { + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + if self.translatable { + write!( + formatter, + r#"<string name="{}">{}</string>"#, + self.name, self.value + ) + } else { + write!( + formatter, + r#"<string name="{}" translatable="false">{}</string>"#, + self.name, self.value + ) + } + } +} + +#[cfg(test)] +mod tests { + use super::{StringResource, StringResources, StringValue}; + + #[test] + fn deserialization() { + let xml_input = r#"<resources> + <string name="first">First string</string> + <string name="second" translatable="false">Second string</string> + </resources>"#; + + let mut expected = StringResources::new(); + + expected.extend(vec![ + StringResource { + name: "first".to_owned(), + translatable: true, + value: StringValue::from_unescaped("First string"), + }, + StringResource { + name: "second".to_owned(), + translatable: false, + value: StringValue::from_unescaped("Second string"), + }, + ]); + + let deserialized: StringResources = + serde_xml_rs::from_str(xml_input).expect("malformed XML in test input"); + + assert_eq!(deserialized, expected); + } + + #[test] + fn deserialization_of_multi_line_strings() { + let xml_input = r#"<resources> + <string name="first">First string is + split in two lines</string> + <string + name="second" + translatable="false" + > + Second string is also split + but it also has some weird whitespace + inside the tags and some indentation + </string> + </resources>"#; + + let mut expected = StringResources::new(); + + expected.extend(vec![ + StringResource { + name: "first".to_owned(), + translatable: true, + value: StringValue::from_unescaped("First string is split in two lines"), + }, + StringResource { + name: "second".to_owned(), + translatable: false, + value: StringValue::from_unescaped(concat!( + "Second string is also split but it also has some weird whitespace inside the ", + "tags and some indentation", + )), + }, + ]); + + let deserialized: StringResources = + serde_xml_rs::from_str(xml_input).expect("malformed XML in test input"); + + assert_eq!(deserialized, expected); + } +} diff --git a/android/translations-converter/src/gettext.rs b/android/translations-converter/src/gettext/mod.rs index c496775ee2..1bed501b72 100644 --- a/android/translations-converter/src/gettext.rs +++ b/android/translations-converter/src/gettext/mod.rs @@ -1,20 +1,15 @@ -use lazy_static::lazy_static; -use regex::Regex; +mod msg_string; +mod plural_form; + use std::{ collections::BTreeMap, - fmt::{self, Display, Formatter}, fs::{File, OpenOptions}, io::{self, BufRead, BufReader, BufWriter, Write}, mem, - ops::Deref, path::Path, }; -lazy_static! { - static ref APOSTROPHE_VARIATION: Regex = Regex::new("’").unwrap(); - static ref ESCAPED_DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap(); - static ref PARAMETERS: Regex = Regex::new(r"%\([^)]*\)").unwrap(); -} +pub use self::{msg_string::MsgString, plural_form::PluralForm}; /// A parsed gettext translation file. #[derive(Clone, Debug)] @@ -23,16 +18,6 @@ pub struct Translation { entries: Vec<MsgEntry>, } -/// Known plural forms. -#[derive(Clone, Copy, Debug)] -pub enum PluralForm { - Single, - SingularForOne, - SingularForZeroAndOne, - Polish, - Russian, -} - /// A message entry in a gettext translation file. #[derive(Clone, Debug)] pub struct MsgEntry { @@ -50,10 +35,6 @@ pub enum MsgValue { }, } -/// A message string in a gettext translation file. -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct MsgString(String); - /// A helper macro to match a string to various prefix and suffix combinations. macro_rules! match_str { ( @@ -74,9 +55,6 @@ macro_rules! match_str { impl Translation { /// Load message entries from a gettext translation file. /// - /// The messages are normalized into a common format so that they can be compared to Android - /// string resource entries. - /// /// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed /// that the header value is one of some hard-coded values, so if new languages that have new /// plurals are added, the code will have to be updated. @@ -119,11 +97,11 @@ impl Translation { for line in lines { match_str! { (line.trim()) ["msgid \"", msg_id, "\""] => { - current_id = Some(normalize(msg_id)); + current_id = Some(MsgString::from_escaped(msg_id)); } ["msgstr \"", translation, "\""] => { if let Some(id) = current_id.take() { - let value = MsgValue::Invariant(normalize(translation)); + let value = MsgValue::Invariant(MsgString::from_escaped(translation)); parsing_header = id.is_empty() && translation.is_empty(); @@ -134,7 +112,7 @@ impl Translation { current_plural_id = None; } ["msgid_plural \"", plural_id, "\""] => { - current_plural_id = Some(normalize(plural_id)); + current_plural_id = Some(MsgString::from_escaped(plural_id)); parsing_header = false; } ["msgstr[", plural_translation, "\""] => { @@ -148,13 +126,13 @@ impl Translation { let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "") .expect("Invalid plural msgstr"); - variants.insert(variant_id, normalize(variant_msg)); + variants.insert(variant_id, MsgString::from_escaped(variant_msg)); parsing_header = false; } ["\"", header, "\\n\""] => { if parsing_header { if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") { - plural_form = Some(PluralForm::from_formula(plural_formula)); + plural_form = PluralForm::from_formula(plural_formula); } } } @@ -201,58 +179,9 @@ impl IntoIterator for Translation { } } -impl PluralForm { - /// Obtain an instance based on a known plural formula. - /// - /// Plural variants need to be obtained using a formula. However, some locales have known - /// formulas, so they can be represented as a known plural form. This constructor can return a - /// plural form based on the formulas that are known to be used in the project. - pub fn from_formula(formula: &str) -> Self { - match formula { - "nplurals=1; plural=0" => PluralForm::Single, - "nplurals=2; plural=(n != 1)" => PluralForm::SingularForOne, - "nplurals=2; plural=(n > 1)" => PluralForm::SingularForZeroAndOne, - "nplurals=4; plural=(n==1 ? 0 : (n%10>=2 && n%10<=4) && (n%100<12 || n%100>14) ? 1 : n!=1 && (n%10>=0 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3)" => { - PluralForm::Polish - } - "nplurals=4; plural=((n%10==1 && n%100!=11) ? 0 : ((n%10 >= 2 && n%10 <=4 && (n%100 < 12 || n%100 > 14)) ? 1 : ((n%10 == 0 || (n%10 >= 5 && n%10 <=9)) || (n%100 >= 11 && n%100 <= 14)) ? 2 : 3))" => { - PluralForm::Russian - } - other => panic!("Unknown plural formula: {}", other), - } - } -} - -impl From<String> for MsgString { - fn from(string: String) -> Self { - MsgString(string) - } -} - -impl From<&str> for MsgString { - fn from(string: &str) -> Self { - string.to_owned().into() - } -} - -impl Display for MsgString { - /// Write the ID message string with proper escaping. - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - self.0.replace(r#"""#, r#"\""#).fmt(formatter) - } -} - -impl Deref for MsgString { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.0.as_str() - } -} - -impl From<String> for MsgValue { - fn from(string: String) -> Self { - MsgValue::Invariant(string.into()) +impl From<MsgString> for MsgValue { + fn from(string: MsgString) -> Self { + MsgValue::Invariant(string) } } @@ -301,14 +230,3 @@ fn parse_line<'l>(line: &'l str, prefix: &str, suffix: &str) -> Option<&'l str> None } } - -fn normalize(string: &str) -> MsgString { - // Use a single common apostrophe character - let string = APOSTROPHE_VARIATION.replace_all(&string, "'"); - // Mark where parameters are positioned, removing the parameter name - let string = PARAMETERS.replace_all(&string, "%"); - // Remove escaped double-quotes - let string = ESCAPED_DOUBLE_QUOTES.replace_all(&string, r#"""#); - - string.into_owned().into() -} diff --git a/android/translations-converter/src/gettext/msg_string.rs b/android/translations-converter/src/gettext/msg_string.rs new file mode 100644 index 0000000000..d693c4ee04 --- /dev/null +++ b/android/translations-converter/src/gettext/msg_string.rs @@ -0,0 +1,74 @@ +use std::{ + fmt::{self, Display, Formatter}, + ops::Deref, +}; + +/// A message string in a gettext translation file. +#[derive(Clone, Debug)] +pub struct MsgString(String); + +impl MsgString { + /// Create a new empty `MsgString`. + /// + /// Equivalent to `MsgString::from_escaped("")`. + pub fn empty() -> Self { + MsgString(String::new()) + } + + /// Create a new `MsgString` from string without any escaped characters. + /// + /// This will ensure that the string has the double quotes characters properly escaped. + pub fn from_unescaped(string: &str) -> Self { + MsgString(string.replace(r#"""#, r#"\""#)) + } + + /// Create a new `MsgString` from string that already has proper escaping. + pub fn from_escaped(string: impl Into<String>) -> Self { + MsgString(string.into()) + } +} + +impl Display for MsgString { + /// Write the ID message string with proper escaping. + fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + self.0.fmt(formatter) + } +} + +impl Deref for MsgString { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.0.as_str() + } +} + +#[cfg(test)] +mod tests { + use super::MsgString; + + #[test] + fn empty_constructor() { + let input = MsgString::empty(); + + assert_eq!(input.to_string(), ""); + } + + #[test] + fn escaping() { + let input = MsgString::from_unescaped(r#""Inside double quotes""#); + + let expected = r#"\"Inside double quotes\""#; + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn not_escaping() { + let original = r#"\"Inside double quotes\""#; + + let input = MsgString::from_escaped(original); + + assert_eq!(input.to_string(), original); + } +} diff --git a/android/translations-converter/src/gettext/plural_form.rs b/android/translations-converter/src/gettext/plural_form.rs new file mode 100644 index 0000000000..c55066c7b7 --- /dev/null +++ b/android/translations-converter/src/gettext/plural_form.rs @@ -0,0 +1,31 @@ +/// Known plural forms. +#[derive(Clone, Copy, Debug)] +pub enum PluralForm { + Single, + SingularForOne, + SingularForZeroAndOne, + Polish, + Russian, +} + +impl PluralForm { + /// Obtain an instance based on a known plural formula. + /// + /// Plural variants need to be obtained using a formula. However, some locales have known + /// formulas, so they can be represented as a known plural form. This constructor can return a + /// plural form based on the formulas that are known to be used in the project. + pub fn from_formula(formula: &str) -> Option<Self> { + match formula { + "nplurals=1; plural=0" => Some(PluralForm::Single), + "nplurals=2; plural=(n != 1)" => Some(PluralForm::SingularForOne), + "nplurals=2; plural=(n > 1)" => Some(PluralForm::SingularForZeroAndOne), + "nplurals=4; plural=(n==1 ? 0 : (n%10>=2 && n%10<=4) && (n%100<12 || n%100>14) ? 1 : n!=1 && (n%10>=0 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3)" => { + Some(PluralForm::Polish) + } + "nplurals=4; plural=((n%10==1 && n%100!=11) ? 0 : ((n%10 >= 2 && n%10 <=4 && (n%100 < 12 || n%100 > 14)) ? 1 : ((n%10 == 0 || (n%10 >= 5 && n%10 <=9)) || (n%100 >= 11 && n%100 <= 14)) ? 2 : 3))" => { + Some(PluralForm::Russian) + } + _ => None + } + } +} diff --git a/android/translations-converter/src/main.rs b/android/translations-converter/src/main.rs index d755913243..d11320b070 100644 --- a/android/translations-converter/src/main.rs +++ b/android/translations-converter/src/main.rs @@ -33,7 +33,9 @@ mod android; mod gettext; +mod normalize; +use crate::normalize::Normalize; use std::{ collections::HashMap, fs::{self, File}, @@ -45,32 +47,14 @@ fn main() { let strings_file = File::open(resources_dir.join("values/strings.xml")) .expect("Failed to open string resources file"); - let mut string_resources: android::StringResources = + let string_resources: android::StringResources = serde_xml_rs::from_reader(strings_file).expect("Failed to read string resources file"); - string_resources.normalize(); - string_resources.retain(|string| string.translatable); - - let mut known_urls = HashMap::with_capacity(string_resources.len()); - let mut known_strings = HashMap::with_capacity(string_resources.len()); - - for string in string_resources { - let destination = if string.value.starts_with("https://mullvad.net/en/") { - &mut known_urls - } else { - &mut known_strings - }; - - if destination - .insert(string.value.to_string(), string.name) - .is_some() - { - panic!( - "String {:?} has more than one Android resource ID", - string.value - ); - } - } + let (known_urls, known_strings): (HashMap<_, _>, HashMap<_, _>) = string_resources + .into_iter() + .filter(|resource| resource.translatable) + .map(|resource| (resource.value.normalize(), resource.name)) + .partition(|(string, _id)| string.starts_with("https://mullvad.net/en/")); let plurals_file = File::open(resources_dir.join("values/plurals.xml")) .expect("Failed to open plurals resources file"); @@ -133,8 +117,8 @@ fn main() { for message in template { match message.value { - gettext::MsgValue::Invariant(_) => missing_translations.remove(&*message.id), - gettext::MsgValue::Plural { .. } => missing_plurals.remove(&*message.id), + gettext::MsgValue::Invariant(_) => missing_translations.remove(&message.id.normalize()), + gettext::MsgValue::Plural { .. } => missing_plurals.remove(&message.id.normalize()), }; } @@ -147,8 +131,8 @@ fn main() { .into_iter() .inspect(|(missing_translation, id)| println!(" {}: {}", id, missing_translation)) .map(|(id, _)| gettext::MsgEntry { - id: id.into(), - value: String::new().into(), + id: gettext::MsgString::from_unescaped(&id), + value: gettext::MsgString::empty().into(), }), ) .expect("Failed to append missing translations to message template file"); @@ -179,30 +163,27 @@ fn main() { .iter() .position(|plural| plural.quantity == android::PluralQuantity::One) .expect("Missing singular variant to use as msgid"); - let id = plural - .items - .remove(singular_position) - .string - .to_string() - .into(); + let id = gettext::MsgString::from_escaped( + plural.items.remove(singular_position).string.to_string(), + ); let other_position = plural .items .iter() .position(|plural| plural.quantity == android::PluralQuantity::Other) .expect("Missing other variant to use as msgid_plural"); - let plural_id = plural - .items - .remove(other_position) - .string - .to_string() - .into(); + let plural_id = gettext::MsgString::from_escaped( + plural.items.remove(other_position).string.to_string(), + ); gettext::MsgEntry { id, value: gettext::MsgValue::Plural { plural_id, - values: vec!["".into(), "".into()], + values: vec![ + gettext::MsgString::empty().into(), + gettext::MsgString::empty().into(), + ], }, } }), @@ -263,16 +244,16 @@ fn generate_translations( for translation in translations { match translation.value { gettext::MsgValue::Invariant(translation_value) => { - if let Some(android_key) = known_strings.remove(&*translation.id) { + if let Some(android_key) = known_strings.remove(&translation.id.normalize()) { localized_strings.push(android::StringResource::new( android_key, - &translation_value, + &translation_value.normalize(), )); } } gettext::MsgValue::Plural { values, .. } => { - if let Some(android_key) = known_plurals.remove(&*translation.id) { - let values = values.into_iter().map(|message| message.to_string()); + if let Some(android_key) = known_plurals.remove(&translation.id.normalize()) { + let values = values.into_iter().map(|message| message.normalize()); localized_plurals.push(android::PluralResource::new( android_key, diff --git a/android/translations-converter/src/normalize.rs b/android/translations-converter/src/normalize.rs new file mode 100644 index 0000000000..9e8af385f4 --- /dev/null +++ b/android/translations-converter/src/normalize.rs @@ -0,0 +1,101 @@ +use lazy_static::lazy_static; +use regex::Regex; + +pub trait Normalize { + /// Normalize the string value into a common format. + /// + /// Makes it possible to compare different representations of translation messages. + fn normalize(&self) -> String; +} + +mod android { + use super::*; + use crate::android::StringValue; + + lazy_static! { + static ref APOSTROPHES: Regex = Regex::new(r"\\'").unwrap(); + static ref DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap(); + static ref PARAMETERS: Regex = Regex::new(r"%[0-9]*\$").unwrap(); + } + + impl Normalize for StringValue { + fn normalize(&self) -> String { + // Unescape apostrophes + let value = APOSTROPHES.replace_all(&*self, "'"); + // Unescape double quotes + let value = DOUBLE_QUOTES.replace_all(&value, r#"""#); + // Mark where parameters are positioned, removing the parameter index + let value = PARAMETERS.replace_all(&value, "%"); + + // Unescape XML characters + htmlize::unescape(value.as_bytes()) + } + } +} + +mod gettext { + use super::*; + use crate::gettext::MsgString; + + lazy_static! { + static ref APOSTROPHE_VARIATION: Regex = Regex::new("’").unwrap(); + static ref ESCAPED_DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap(); + static ref PARAMETERS: Regex = Regex::new(r"%\([^)]*\)").unwrap(); + } + + impl Normalize for MsgString { + fn normalize(&self) -> String { + // Use a single common apostrophe character + let string = APOSTROPHE_VARIATION.replace_all(&*self, "'"); + // Mark where parameters are positioned, removing the parameter name + let string = PARAMETERS.replace_all(&string, "%"); + // Remove escaped double-quotes + let string = ESCAPED_DOUBLE_QUOTES.replace_all(&string, r#"""#); + + string.into_owned() + } + } +} + +#[cfg(test)] +mod tests { + use super::Normalize; + + #[test] + fn normalize_android_string_value() { + use crate::android::StringValue; + + let input = StringValue::from_unescaped(concat!( + "'Inside single quotes'", + r#""Inside double quotes""#, + "With parameters: %1$d, %2$s", + )); + + let expected = concat!( + "\'Inside single quotes\'", + r#""Inside double quotes""#, + "With parameters: %d, %s", + ); + + assert_eq!(input.normalize(), expected); + } + + #[test] + fn normalize_gettext_msg_string() { + use crate::gettext::MsgString; + + let input = MsgString::from_unescaped(concat!( + "'Inside single quotes'", + r#""Inside double quotes""#, + "With parameters: %(number)d, %(string)s", + )); + + let expected = concat!( + "\'Inside single quotes\'", + r#""Inside double quotes""#, + "With parameters: %d, %s", + ); + + assert_eq!(input.normalize(), expected); + } +} |
