summaryrefslogtreecommitdiffhomepage
path: root/android
diff options
context:
space:
mode:
authorJanito Vaqueiro Ferreira Filho <janito@mullvad.net>2021-05-19 11:34:28 -0300
committerJanito Vaqueiro Ferreira Filho <janito@mullvad.net>2021-05-19 11:34:28 -0300
commit4cf35350a9d86c81d78acaed00e582ce045ccb3c (patch)
tree976a7c2333ed65da160aeb4d3e4ff86b3abe7f1b /android
parent6b2a852d25ba0cfe5bd06bd00f32f6ccdf6314b7 (diff)
parent7de3c8757b191a403e90af439dcc9377911794d6 (diff)
downloadmullvadvpn-4cf35350a9d86c81d78acaed00e582ce045ccb3c.tar.xz
mullvadvpn-4cf35350a9d86c81d78acaed00e582ce045ccb3c.zip
Merge branch 'refactor-translations-converter'
Diffstat (limited to 'android')
-rw-r--r--android/translations-converter/src/android.rs347
-rw-r--r--android/translations-converter/src/android/mod.rs9
-rw-r--r--android/translations-converter/src/android/plurals.rs148
-rw-r--r--android/translations-converter/src/android/string_value.rs215
-rw-r--r--android/translations-converter/src/android/strings.rs189
-rw-r--r--android/translations-converter/src/gettext/mod.rs (renamed from android/translations-converter/src/gettext.rs)106
-rw-r--r--android/translations-converter/src/gettext/msg_string.rs74
-rw-r--r--android/translations-converter/src/gettext/plural_form.rs31
-rw-r--r--android/translations-converter/src/main.rs71
-rw-r--r--android/translations-converter/src/normalize.rs101
10 files changed, 805 insertions, 486 deletions
diff --git a/android/translations-converter/src/android.rs b/android/translations-converter/src/android.rs
deleted file mode 100644
index f49f50025c..0000000000
--- a/android/translations-converter/src/android.rs
+++ /dev/null
@@ -1,347 +0,0 @@
-use lazy_static::lazy_static;
-use regex::Regex;
-use serde::{Deserialize, Serialize};
-use std::{
- fmt::{self, Display, Formatter},
- ops::{Deref, DerefMut},
-};
-
-lazy_static! {
- static ref LINE_BREAKS: Regex = Regex::new(r"\s*\n\s*").unwrap();
- static ref APOSTROPHES: Regex = Regex::new(r"\\'").unwrap();
- static ref DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap();
- static ref PARAMETERS: Regex = Regex::new(r"%[0-9]*\$").unwrap();
-}
-
-/// Contents of an Android string resources file.
-///
-/// This type can be created directly deserializing the `strings.xml` file.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct StringResources {
- #[serde(rename = "string")]
- entries: Vec<StringResource>,
-}
-
-/// An entry in an Android string resources file.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct StringResource {
- /// The string resource ID.
- pub name: String,
-
- /// If the string should be translated or not.
- #[serde(default = "default_translatable")]
- pub translatable: bool,
-
- /// The string value.
- #[serde(rename = "$value")]
- pub value: StringValue,
-}
-
-impl StringResources {
- /// Create an empty list of Android string resources.
- pub fn new() -> Self {
- StringResources {
- entries: Vec::new(),
- }
- }
-
- /// Normalize the strings into a common format.
- ///
- /// Allows the string values to be compared to the gettext messages.
- pub fn normalize(&mut self) {
- for entry in &mut self.entries {
- entry.normalize();
- }
- }
-
- /// Sorts the entries alphabetically based on their IDs.
- pub fn sort(&mut self) {
- self.entries
- .sort_by(|left, right| left.name.cmp(&right.name));
- }
-}
-
-impl Deref for StringResources {
- type Target = Vec<StringResource>;
-
- fn deref(&self) -> &Self::Target {
- &self.entries
- }
-}
-
-impl DerefMut for StringResources {
- fn deref_mut(&mut self) -> &mut Self::Target {
- &mut self.entries
- }
-}
-
-impl IntoIterator for StringResources {
- type Item = StringResource;
- type IntoIter = std::vec::IntoIter<Self::Item>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.entries.into_iter()
- }
-}
-
-impl StringResource {
- /// Create a new Android string resource entry.
- ///
- /// The name is the resource ID, and the value will be properly escaped.
- pub fn new(name: String, value: &str) -> Self {
- StringResource {
- name,
- translatable: true,
- value: StringValue::from(value),
- }
- }
-
- /// Normalize the string value into a common format.
- ///
- /// Makes it possible to compare the Android strings with the gettext messages.
- pub fn normalize(&mut self) {
- self.value.normalize();
- }
-}
-
-fn default_translatable() -> bool {
- true
-}
-
-// Unfortunately, direct serialization to XML isn't working correctly.
-impl Display for StringResources {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?;
- writeln!(formatter, "<resources>")?;
-
- for string in &self.entries {
- writeln!(formatter, " {}", string)?;
- }
-
- writeln!(formatter, "</resources>")
- }
-}
-
-impl Display for StringResource {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- if self.translatable {
- write!(
- formatter,
- r#"<string name="{}">{}</string>"#,
- self.name, self.value
- )
- } else {
- write!(
- formatter,
- r#"<string name="{}" translatable="false">{}</string>"#,
- self.name, self.value
- )
- }
- }
-}
-
-/// Contents of an Android plurals resources file.
-///
-/// This type can be created directly deserializing the `plurals.xml` file.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct PluralResources {
- #[serde(rename = "plurals")]
- entries: Vec<PluralResource>,
-}
-
-/// An entry in an Android plurals resources file.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct PluralResource {
- /// The plural resource ID.
- pub name: String,
-
- /// The items of the plural resource, one for each quantity variant.
- #[serde(rename = "item")]
- pub items: Vec<PluralVariant>,
-}
-
-/// A string resource for a specific quantity.
-///
-/// This is part of a plural resource.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct PluralVariant {
- /// The quantity for this variant to be used.
- pub quantity: PluralQuantity,
-
- /// The string value
- #[serde(rename = "$value")]
- pub string: StringValue,
-}
-
-/// A valid quantity for a plural variant.
-#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum PluralQuantity {
- Zero,
- One,
- Few,
- Many,
- Other,
-}
-
-impl Deref for PluralResources {
- type Target = Vec<PluralResource>;
-
- fn deref(&self) -> &Self::Target {
- &self.entries
- }
-}
-
-impl DerefMut for PluralResources {
- fn deref_mut(&mut self) -> &mut Self::Target {
- &mut self.entries
- }
-}
-
-impl IntoIterator for PluralResources {
- type Item = PluralResource;
- type IntoIter = std::vec::IntoIter<Self::Item>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.entries.into_iter()
- }
-}
-
-impl PluralResources {
- /// Create an empty list of plural resources.
- pub fn new() -> Self {
- PluralResources {
- entries: Vec::new(),
- }
- }
-}
-
-impl PluralResource {
- /// Create a plural resource representation.
- ///
- /// The resource has a name, used as the identifier, and a list of items. Each item contains
- /// the message and the quantity it should be used for.
- pub fn new(name: String, values: impl Iterator<Item = (PluralQuantity, String)>) -> Self {
- let items = values
- .map(|(quantity, string)| PluralVariant {
- quantity,
- string: StringValue::from(&*string),
- })
- .collect();
-
- PluralResource { name, items }
- }
-}
-
-impl Display for PluralResources {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?;
- writeln!(formatter, "<resources>")?;
-
- for entry in &self.entries {
- write!(formatter, "{}", entry)?;
- }
-
- writeln!(formatter, "</resources>")
- }
-}
-
-impl Display for PluralResource {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- writeln!(formatter, r#" <plurals name="{}">"#, self.name)?;
-
- for item in &self.items {
- writeln!(formatter, " {}", item)?;
- }
-
- writeln!(formatter, " </plurals>")
- }
-}
-
-impl Display for PluralVariant {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- write!(
- formatter,
- r#"<item quantity="{}">{}</item>"#,
- self.quantity, self.string
- )
- }
-}
-
-impl Display for PluralQuantity {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- let quantity = match self {
- PluralQuantity::Zero => "zero",
- PluralQuantity::One => "one",
- PluralQuantity::Few => "few",
- PluralQuantity::Many => "many",
- PluralQuantity::Other => "other",
- };
-
- write!(formatter, "{}", quantity)
- }
-}
-
-/// An Android string value
-///
-/// Handles escaping the string when it is created but also allows normalizing it for comparing it
-/// with gettext messages through a `normalize` method.
-#[derive(Clone, Debug, Eq, Deserialize, Hash, PartialEq, Serialize)]
-pub struct StringValue(String);
-
-impl From<&str> for StringValue {
- fn from(string: &str) -> Self {
- let value_with_parameters = htmlize::escape_text(string)
- .replace(r"\", r"\\")
- .replace("\"", "\\\"")
- .replace(r"'", r"\'");
-
- let mut parts = value_with_parameters.split("%");
- let mut value = parts.next().unwrap().to_owned();
-
- for (index, part) in parts.enumerate() {
- value.push_str(&format!("%{}$", index + 1));
- value.push_str(part);
- }
-
- StringValue(value)
- }
-}
-
-impl StringValue {
- /// Normalize the string value into a common format.
- ///
- /// Makes it possible to compare the Android strings with the gettext messages.
- pub fn normalize(&mut self) {
- // Collapse line breaks present in the XML file
- let value = LINE_BREAKS.replace_all(&self.0, " ");
- // Unescape apostrophes
- let value = APOSTROPHES.replace_all(&value, "'");
- // Unescape double quotes
- let value = DOUBLE_QUOTES.replace_all(&value, r#"""#);
- // Mark where parameters are positioned, removing the parameter index
- let value = PARAMETERS.replace_all(&value, "%");
-
- // Unescape XML characters
- self.0 = htmlize::unescape(value.as_bytes());
- }
-
- /// Clones the internal string value.
- pub fn to_string(&self) -> String {
- self.0.clone()
- }
-}
-
-impl Deref for StringValue {
- type Target = str;
-
- fn deref(&self) -> &Self::Target {
- self.0.as_str()
- }
-}
-
-impl Display for StringValue {
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- write!(formatter, "{}", self.0)
- }
-}
diff --git a/android/translations-converter/src/android/mod.rs b/android/translations-converter/src/android/mod.rs
new file mode 100644
index 0000000000..8bc9f8e41e
--- /dev/null
+++ b/android/translations-converter/src/android/mod.rs
@@ -0,0 +1,9 @@
+mod plurals;
+mod string_value;
+mod strings;
+
+pub use self::{
+ plurals::{PluralQuantity, PluralResource, PluralResources},
+ string_value::StringValue,
+ strings::{StringResource, StringResources},
+};
diff --git a/android/translations-converter/src/android/plurals.rs b/android/translations-converter/src/android/plurals.rs
new file mode 100644
index 0000000000..6378eb502c
--- /dev/null
+++ b/android/translations-converter/src/android/plurals.rs
@@ -0,0 +1,148 @@
+use super::string_value::StringValue;
+use serde::{Deserialize, Serialize};
+use std::{
+ fmt::{self, Display, Formatter},
+ ops::{Deref, DerefMut},
+};
+
+/// Contents of an Android plurals resources file.
+///
+/// This type can be created directly deserializing the `plurals.xml` file.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct PluralResources {
+ #[serde(rename = "plurals")]
+ entries: Vec<PluralResource>,
+}
+
+/// An entry in an Android plurals resources file.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct PluralResource {
+ /// The plural resource ID.
+ pub name: String,
+
+ /// The items of the plural resource, one for each quantity variant.
+ #[serde(rename = "item")]
+ pub items: Vec<PluralVariant>,
+}
+
+/// A string resource for a specific quantity.
+///
+/// This is part of a plural resource.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct PluralVariant {
+ /// The quantity for this variant to be used.
+ pub quantity: PluralQuantity,
+
+ /// The string value
+ #[serde(rename = "$value")]
+ pub string: StringValue,
+}
+
+/// A valid quantity for a plural variant.
+#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum PluralQuantity {
+ Zero,
+ One,
+ Few,
+ Many,
+ Other,
+}
+
+impl Deref for PluralResources {
+ type Target = Vec<PluralResource>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.entries
+ }
+}
+
+impl DerefMut for PluralResources {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.entries
+ }
+}
+
+impl IntoIterator for PluralResources {
+ type Item = PluralResource;
+ type IntoIter = std::vec::IntoIter<Self::Item>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.entries.into_iter()
+ }
+}
+
+impl PluralResources {
+ /// Create an empty list of plural resources.
+ pub fn new() -> Self {
+ PluralResources {
+ entries: Vec::new(),
+ }
+ }
+}
+
+impl PluralResource {
+ /// Create a plural resource representation.
+ ///
+ /// The resource has a name, used as the identifier, and a list of items. Each item contains
+ /// the message and the quantity it should be used for.
+ pub fn new(name: String, values: impl Iterator<Item = (PluralQuantity, String)>) -> Self {
+ let items = values
+ .map(|(quantity, string)| PluralVariant {
+ quantity,
+ string: StringValue::from_unescaped(&string),
+ })
+ .collect();
+
+ PluralResource { name, items }
+ }
+}
+
+impl Display for PluralResources {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?;
+ writeln!(formatter, "<resources>")?;
+
+ for entry in &self.entries {
+ write!(formatter, "{}", entry)?;
+ }
+
+ writeln!(formatter, "</resources>")
+ }
+}
+
+impl Display for PluralResource {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ writeln!(formatter, r#" <plurals name="{}">"#, self.name)?;
+
+ for item in &self.items {
+ writeln!(formatter, " {}", item)?;
+ }
+
+ writeln!(formatter, " </plurals>")
+ }
+}
+
+impl Display for PluralVariant {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ write!(
+ formatter,
+ r#"<item quantity="{}">{}</item>"#,
+ self.quantity, self.string
+ )
+ }
+}
+
+impl Display for PluralQuantity {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ let quantity = match self {
+ PluralQuantity::Zero => "zero",
+ PluralQuantity::One => "one",
+ PluralQuantity::Few => "few",
+ PluralQuantity::Many => "many",
+ PluralQuantity::Other => "other",
+ };
+
+ write!(formatter, "{}", quantity)
+ }
+}
diff --git a/android/translations-converter/src/android/string_value.rs b/android/translations-converter/src/android/string_value.rs
new file mode 100644
index 0000000000..bd202f16fb
--- /dev/null
+++ b/android/translations-converter/src/android/string_value.rs
@@ -0,0 +1,215 @@
+use lazy_static::lazy_static;
+use regex::Regex;
+use serde::{Deserialize, Deserializer, Serialize};
+use std::{
+ fmt::{self, Display, Formatter},
+ ops::Deref,
+};
+
+/// An Android string value
+#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize)]
+pub struct StringValue(String);
+
+impl StringValue {
+ /// Create a `StringValue` from an unescaped string.
+ ///
+ /// The string will be properly escaped, and all parameters will have indices added to them if
+ /// they don't have any. Indices are assigned sequentially starting from the previously
+ /// specified index plus one, or starting from one if there aren't any previously specified
+ /// indices.
+ pub fn from_unescaped(string: &str) -> Self {
+ let value_with_parameters = htmlize::escape_text(string)
+ .replace(r"\", r"\\")
+ .replace("\"", "\\\"")
+ .replace(r"'", r"\'");
+
+ let value_without_line_breaks = Self::collapse_line_breaks(value_with_parameters);
+ let value = Self::ensure_parameters_are_indexed(value_without_line_breaks);
+
+ StringValue(value)
+ }
+
+ /// The input XML file might have line breaks inside the string, and they should be collapsed
+ /// into a single whitespace character.
+ fn collapse_line_breaks(original: String) -> String {
+ lazy_static! {
+ static ref LINE_BREAKS: Regex = Regex::new(r"\s*\n\s*").unwrap();
+ }
+
+ LINE_BREAKS.replace_all(&original, " ").into_owned()
+ }
+
+ /// This helper method ensures parameters are in the form of `%4$d`, i.e., it will ensure that
+ /// there is the `<number>$` part.
+ ///
+ /// A typical input would be something like `Things are %d, %3$s and %s`, and this method
+ /// would update the string so that all parameters have indices: `Things are %1$d, %3$s and
+ /// %4$s`.
+ fn ensure_parameters_are_indexed(original: String) -> String {
+ lazy_static! {
+ static ref PARAMETER_INDEX: Regex = Regex::new(r"^(\d+)\$").unwrap();
+ }
+
+ let mut parts = original.split("%");
+ let mut output = parts.next().unwrap().to_owned();
+ let mut offset = 1;
+
+ for (index, part) in parts.enumerate() {
+ let index = index as isize;
+
+ if let Some(captures) = PARAMETER_INDEX.captures(part) {
+ // String already has a parameter index
+ let specified_index: isize = captures
+ .get(1)
+ .expect("Regex has at least one capture group")
+ .as_str()
+ .parse()
+ .expect("First capture group should match an integer");
+
+ // Update offset so that next parameters without index receive sequential values
+ // starting after the specified index
+ offset = specified_index - index;
+
+ // Restore '%' removed during the split
+ output.push('%');
+ } else {
+ // String doesn't have a parameter index, so it is added
+ output.push_str(&format!("%{}$", index + offset));
+ }
+
+ output.push_str(part);
+ }
+
+ output
+ }
+}
+
+impl StringValue {
+ /// Clones the internal string value.
+ pub fn to_string(&self) -> String {
+ self.0.clone()
+ }
+}
+
+impl Deref for StringValue {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ self.0.as_str()
+ }
+}
+
+impl Display for StringValue {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ write!(formatter, "{}", self.0)
+ }
+}
+
+impl<'de> Deserialize<'de> for StringValue {
+ fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
+ let raw_string = String::deserialize(deserializer)?;
+ let string_with_collapsed_newlines = Self::collapse_line_breaks(raw_string);
+
+ Ok(StringValue(string_with_collapsed_newlines))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::StringValue;
+
+ #[test]
+ fn android_escaping() {
+ let input = StringValue::from_unescaped(concat!(
+ r"A backslash \",
+ r#""Inside double quotes""#,
+ "'Inside single quotes'",
+ ));
+
+ let expected = concat!(
+ r"A backslash \\",
+ r#"\"Inside double quotes\""#,
+ r"\'Inside single quotes\'",
+ );
+
+ assert_eq!(input.to_string(), expected);
+ }
+
+ #[test]
+ fn newline_collapsing() {
+ let input = StringValue::from_unescaped(
+ "This is
+ a multi-line string
+ that should be
+ collapsed into a single line",
+ );
+
+ let expected = "This is a multi-line string that should be collapsed into a single line";
+
+ assert_eq!(input.to_string(), expected);
+ }
+
+ #[test]
+ fn xml_escaping() {
+ let input = StringValue::from_unescaped(concat!(
+ "An ampersand: &",
+ "<tag>A dummy fake XML tag</tag>",
+ ));
+
+ let expected = concat!(
+ "An ampersand: &amp;",
+ r"&lt;tag&gt;A dummy fake XML tag&lt;/tag&gt;",
+ );
+
+ assert_eq!(input.to_string(), expected);
+ }
+
+ #[test]
+ fn doesnt_change_parameter_indices() {
+ let original = "%1$d %3$s %9$s %6$d %7$d";
+
+ let input = StringValue::from_unescaped(original);
+
+ assert_eq!(input.to_string(), original);
+ }
+
+ #[test]
+ fn adds_parameter_indices() {
+ let input = StringValue::from_unescaped("%d %s %s %d");
+
+ let expected = "%1$d %2$s %3$s %4$d";
+
+ assert_eq!(input.to_string(), expected);
+ }
+
+ #[test]
+ fn correctly_updates_generated_index_offset_based_on_existing_indices() {
+ let input = StringValue::from_unescaped("%d %4$s %d %2$s %d");
+
+ let expected = "%1$d %4$s %5$d %2$s %3$d";
+
+ assert_eq!(input.to_string(), expected);
+ }
+
+ #[test]
+ fn deserialization() {
+ #[derive(serde::Deserialize)]
+ pub struct Wrapper {
+ #[serde(rename = "$value")]
+ value: StringValue,
+ }
+
+ let serialized_input = r#"<root>A multi-line string value
+ with \"quotes\" and
+ parameters %2$s %d %1$d</root>"#;
+
+ let deserialized: Wrapper =
+ serde_xml_rs::from_str(serialized_input).expect("Mal-formed serialized input");
+
+ let expected = StringValue(
+ r#"A multi-line string value with \"quotes\" and parameters %2$s %d %1$d"#.to_owned(),
+ );
+
+ assert_eq!(deserialized.value, expected);
+ }
+}
diff --git a/android/translations-converter/src/android/strings.rs b/android/translations-converter/src/android/strings.rs
new file mode 100644
index 0000000000..19fe03e18f
--- /dev/null
+++ b/android/translations-converter/src/android/strings.rs
@@ -0,0 +1,189 @@
+use super::string_value::StringValue;
+use serde::{Deserialize, Serialize};
+use std::{
+ fmt::{self, Display, Formatter},
+ ops::{Deref, DerefMut},
+};
+
+/// Contents of an Android string resources file.
+///
+/// This type can be created directly deserializing the `strings.xml` file.
+#[derive(Clone, Debug, Eq, Deserialize, PartialEq, Serialize)]
+pub struct StringResources {
+ #[serde(rename = "string")]
+ entries: Vec<StringResource>,
+}
+
+/// An entry in an Android string resources file.
+#[derive(Clone, Debug, Eq, Deserialize, PartialEq, Serialize)]
+pub struct StringResource {
+ /// The string resource ID.
+ pub name: String,
+
+ /// If the string should be translated or not.
+ #[serde(default = "default_translatable")]
+ pub translatable: bool,
+
+ /// The string value.
+ #[serde(rename = "$value")]
+ pub value: StringValue,
+}
+
+impl StringResources {
+ /// Create an empty list of Android string resources.
+ pub fn new() -> Self {
+ StringResources {
+ entries: Vec::new(),
+ }
+ }
+
+ /// Sorts the entries alphabetically based on their IDs.
+ pub fn sort(&mut self) {
+ self.entries
+ .sort_by(|left, right| left.name.cmp(&right.name));
+ }
+}
+
+impl Deref for StringResources {
+ type Target = Vec<StringResource>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.entries
+ }
+}
+
+impl DerefMut for StringResources {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.entries
+ }
+}
+
+impl IntoIterator for StringResources {
+ type Item = StringResource;
+ type IntoIter = std::vec::IntoIter<Self::Item>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.entries.into_iter()
+ }
+}
+
+impl StringResource {
+ /// Create a new Android string resource entry.
+ ///
+ /// The name is the resource ID, and the value will be properly escaped.
+ pub fn new(name: String, value: &str) -> Self {
+ StringResource {
+ name,
+ translatable: true,
+ value: StringValue::from_unescaped(value),
+ }
+ }
+}
+
+fn default_translatable() -> bool {
+ true
+}
+
+// Unfortunately, direct serialization to XML isn't working correctly.
+impl Display for StringResources {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ writeln!(formatter, r#"<?xml version="1.0" encoding="utf-8"?>"#)?;
+ writeln!(formatter, "<resources>")?;
+
+ for string in &self.entries {
+ writeln!(formatter, " {}", string)?;
+ }
+
+ writeln!(formatter, "</resources>")
+ }
+}
+
+impl Display for StringResource {
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ if self.translatable {
+ write!(
+ formatter,
+ r#"<string name="{}">{}</string>"#,
+ self.name, self.value
+ )
+ } else {
+ write!(
+ formatter,
+ r#"<string name="{}" translatable="false">{}</string>"#,
+ self.name, self.value
+ )
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{StringResource, StringResources, StringValue};
+
+ #[test]
+ fn deserialization() {
+ let xml_input = r#"<resources>
+ <string name="first">First string</string>
+ <string name="second" translatable="false">Second string</string>
+ </resources>"#;
+
+ let mut expected = StringResources::new();
+
+ expected.extend(vec![
+ StringResource {
+ name: "first".to_owned(),
+ translatable: true,
+ value: StringValue::from_unescaped("First string"),
+ },
+ StringResource {
+ name: "second".to_owned(),
+ translatable: false,
+ value: StringValue::from_unescaped("Second string"),
+ },
+ ]);
+
+ let deserialized: StringResources =
+ serde_xml_rs::from_str(xml_input).expect("malformed XML in test input");
+
+ assert_eq!(deserialized, expected);
+ }
+
+ #[test]
+ fn deserialization_of_multi_line_strings() {
+ let xml_input = r#"<resources>
+ <string name="first">First string is
+ split in two lines</string>
+ <string
+ name="second"
+ translatable="false"
+ >
+ Second string is also split
+ but it also has some weird whitespace
+ inside the tags and some indentation
+ </string>
+ </resources>"#;
+
+ let mut expected = StringResources::new();
+
+ expected.extend(vec![
+ StringResource {
+ name: "first".to_owned(),
+ translatable: true,
+ value: StringValue::from_unescaped("First string is split in two lines"),
+ },
+ StringResource {
+ name: "second".to_owned(),
+ translatable: false,
+ value: StringValue::from_unescaped(concat!(
+ "Second string is also split but it also has some weird whitespace inside the ",
+ "tags and some indentation",
+ )),
+ },
+ ]);
+
+ let deserialized: StringResources =
+ serde_xml_rs::from_str(xml_input).expect("malformed XML in test input");
+
+ assert_eq!(deserialized, expected);
+ }
+}
diff --git a/android/translations-converter/src/gettext.rs b/android/translations-converter/src/gettext/mod.rs
index c496775ee2..1bed501b72 100644
--- a/android/translations-converter/src/gettext.rs
+++ b/android/translations-converter/src/gettext/mod.rs
@@ -1,20 +1,15 @@
-use lazy_static::lazy_static;
-use regex::Regex;
+mod msg_string;
+mod plural_form;
+
use std::{
collections::BTreeMap,
- fmt::{self, Display, Formatter},
fs::{File, OpenOptions},
io::{self, BufRead, BufReader, BufWriter, Write},
mem,
- ops::Deref,
path::Path,
};
-lazy_static! {
- static ref APOSTROPHE_VARIATION: Regex = Regex::new("’").unwrap();
- static ref ESCAPED_DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap();
- static ref PARAMETERS: Regex = Regex::new(r"%\([^)]*\)").unwrap();
-}
+pub use self::{msg_string::MsgString, plural_form::PluralForm};
/// A parsed gettext translation file.
#[derive(Clone, Debug)]
@@ -23,16 +18,6 @@ pub struct Translation {
entries: Vec<MsgEntry>,
}
-/// Known plural forms.
-#[derive(Clone, Copy, Debug)]
-pub enum PluralForm {
- Single,
- SingularForOne,
- SingularForZeroAndOne,
- Polish,
- Russian,
-}
-
/// A message entry in a gettext translation file.
#[derive(Clone, Debug)]
pub struct MsgEntry {
@@ -50,10 +35,6 @@ pub enum MsgValue {
},
}
-/// A message string in a gettext translation file.
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-pub struct MsgString(String);
-
/// A helper macro to match a string to various prefix and suffix combinations.
macro_rules! match_str {
(
@@ -74,9 +55,6 @@ macro_rules! match_str {
impl Translation {
/// Load message entries from a gettext translation file.
///
- /// The messages are normalized into a common format so that they can be compared to Android
- /// string resource entries.
- ///
/// The only metadata that is parsed from the file is the "Plural-Form" header. It is assumed
/// that the header value is one of some hard-coded values, so if new languages that have new
/// plurals are added, the code will have to be updated.
@@ -119,11 +97,11 @@ impl Translation {
for line in lines {
match_str! { (line.trim())
["msgid \"", msg_id, "\""] => {
- current_id = Some(normalize(msg_id));
+ current_id = Some(MsgString::from_escaped(msg_id));
}
["msgstr \"", translation, "\""] => {
if let Some(id) = current_id.take() {
- let value = MsgValue::Invariant(normalize(translation));
+ let value = MsgValue::Invariant(MsgString::from_escaped(translation));
parsing_header = id.is_empty() && translation.is_empty();
@@ -134,7 +112,7 @@ impl Translation {
current_plural_id = None;
}
["msgid_plural \"", plural_id, "\""] => {
- current_plural_id = Some(normalize(plural_id));
+ current_plural_id = Some(MsgString::from_escaped(plural_id));
parsing_header = false;
}
["msgstr[", plural_translation, "\""] => {
@@ -148,13 +126,13 @@ impl Translation {
let variant_msg = parse_line(&plural_translation[variant_id_end..], "] \"", "")
.expect("Invalid plural msgstr");
- variants.insert(variant_id, normalize(variant_msg));
+ variants.insert(variant_id, MsgString::from_escaped(variant_msg));
parsing_header = false;
}
["\"", header, "\\n\""] => {
if parsing_header {
if let Some(plural_formula) = parse_line(header, "Plural-Forms: ", ";") {
- plural_form = Some(PluralForm::from_formula(plural_formula));
+ plural_form = PluralForm::from_formula(plural_formula);
}
}
}
@@ -201,58 +179,9 @@ impl IntoIterator for Translation {
}
}
-impl PluralForm {
- /// Obtain an instance based on a known plural formula.
- ///
- /// Plural variants need to be obtained using a formula. However, some locales have known
- /// formulas, so they can be represented as a known plural form. This constructor can return a
- /// plural form based on the formulas that are known to be used in the project.
- pub fn from_formula(formula: &str) -> Self {
- match formula {
- "nplurals=1; plural=0" => PluralForm::Single,
- "nplurals=2; plural=(n != 1)" => PluralForm::SingularForOne,
- "nplurals=2; plural=(n > 1)" => PluralForm::SingularForZeroAndOne,
- "nplurals=4; plural=(n==1 ? 0 : (n%10>=2 && n%10<=4) && (n%100<12 || n%100>14) ? 1 : n!=1 && (n%10>=0 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3)" => {
- PluralForm::Polish
- }
- "nplurals=4; plural=((n%10==1 && n%100!=11) ? 0 : ((n%10 >= 2 && n%10 <=4 && (n%100 < 12 || n%100 > 14)) ? 1 : ((n%10 == 0 || (n%10 >= 5 && n%10 <=9)) || (n%100 >= 11 && n%100 <= 14)) ? 2 : 3))" => {
- PluralForm::Russian
- }
- other => panic!("Unknown plural formula: {}", other),
- }
- }
-}
-
-impl From<String> for MsgString {
- fn from(string: String) -> Self {
- MsgString(string)
- }
-}
-
-impl From<&str> for MsgString {
- fn from(string: &str) -> Self {
- string.to_owned().into()
- }
-}
-
-impl Display for MsgString {
- /// Write the ID message string with proper escaping.
- fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
- self.0.replace(r#"""#, r#"\""#).fmt(formatter)
- }
-}
-
-impl Deref for MsgString {
- type Target = str;
-
- fn deref(&self) -> &Self::Target {
- self.0.as_str()
- }
-}
-
-impl From<String> for MsgValue {
- fn from(string: String) -> Self {
- MsgValue::Invariant(string.into())
+impl From<MsgString> for MsgValue {
+ fn from(string: MsgString) -> Self {
+ MsgValue::Invariant(string)
}
}
@@ -301,14 +230,3 @@ fn parse_line<'l>(line: &'l str, prefix: &str, suffix: &str) -> Option<&'l str>
None
}
}
-
-fn normalize(string: &str) -> MsgString {
- // Use a single common apostrophe character
- let string = APOSTROPHE_VARIATION.replace_all(&string, "'");
- // Mark where parameters are positioned, removing the parameter name
- let string = PARAMETERS.replace_all(&string, "%");
- // Remove escaped double-quotes
- let string = ESCAPED_DOUBLE_QUOTES.replace_all(&string, r#"""#);
-
- string.into_owned().into()
-}
diff --git a/android/translations-converter/src/gettext/msg_string.rs b/android/translations-converter/src/gettext/msg_string.rs
new file mode 100644
index 0000000000..d693c4ee04
--- /dev/null
+++ b/android/translations-converter/src/gettext/msg_string.rs
@@ -0,0 +1,74 @@
+use std::{
+ fmt::{self, Display, Formatter},
+ ops::Deref,
+};
+
+/// A message string in a gettext translation file.
+#[derive(Clone, Debug)]
+pub struct MsgString(String);
+
+impl MsgString {
+ /// Create a new empty `MsgString`.
+ ///
+ /// Equivalent to `MsgString::from_escaped("")`.
+ pub fn empty() -> Self {
+ MsgString(String::new())
+ }
+
+ /// Create a new `MsgString` from string without any escaped characters.
+ ///
+ /// This will ensure that the string has the double quotes characters properly escaped.
+ pub fn from_unescaped(string: &str) -> Self {
+ MsgString(string.replace(r#"""#, r#"\""#))
+ }
+
+ /// Create a new `MsgString` from string that already has proper escaping.
+ pub fn from_escaped(string: impl Into<String>) -> Self {
+ MsgString(string.into())
+ }
+}
+
+impl Display for MsgString {
+ /// Write the ID message string with proper escaping.
+ fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+ self.0.fmt(formatter)
+ }
+}
+
+impl Deref for MsgString {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ self.0.as_str()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::MsgString;
+
+ #[test]
+ fn empty_constructor() {
+ let input = MsgString::empty();
+
+ assert_eq!(input.to_string(), "");
+ }
+
+ #[test]
+ fn escaping() {
+ let input = MsgString::from_unescaped(r#""Inside double quotes""#);
+
+ let expected = r#"\"Inside double quotes\""#;
+
+ assert_eq!(input.to_string(), expected);
+ }
+
+ #[test]
+ fn not_escaping() {
+ let original = r#"\"Inside double quotes\""#;
+
+ let input = MsgString::from_escaped(original);
+
+ assert_eq!(input.to_string(), original);
+ }
+}
diff --git a/android/translations-converter/src/gettext/plural_form.rs b/android/translations-converter/src/gettext/plural_form.rs
new file mode 100644
index 0000000000..c55066c7b7
--- /dev/null
+++ b/android/translations-converter/src/gettext/plural_form.rs
@@ -0,0 +1,31 @@
+/// Known plural forms.
+#[derive(Clone, Copy, Debug)]
+pub enum PluralForm {
+ Single,
+ SingularForOne,
+ SingularForZeroAndOne,
+ Polish,
+ Russian,
+}
+
+impl PluralForm {
+ /// Obtain an instance based on a known plural formula.
+ ///
+ /// Plural variants need to be obtained using a formula. However, some locales have known
+ /// formulas, so they can be represented as a known plural form. This constructor can return a
+ /// plural form based on the formulas that are known to be used in the project.
+ pub fn from_formula(formula: &str) -> Option<Self> {
+ match formula {
+ "nplurals=1; plural=0" => Some(PluralForm::Single),
+ "nplurals=2; plural=(n != 1)" => Some(PluralForm::SingularForOne),
+ "nplurals=2; plural=(n > 1)" => Some(PluralForm::SingularForZeroAndOne),
+ "nplurals=4; plural=(n==1 ? 0 : (n%10>=2 && n%10<=4) && (n%100<12 || n%100>14) ? 1 : n!=1 && (n%10>=0 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3)" => {
+ Some(PluralForm::Polish)
+ }
+ "nplurals=4; plural=((n%10==1 && n%100!=11) ? 0 : ((n%10 >= 2 && n%10 <=4 && (n%100 < 12 || n%100 > 14)) ? 1 : ((n%10 == 0 || (n%10 >= 5 && n%10 <=9)) || (n%100 >= 11 && n%100 <= 14)) ? 2 : 3))" => {
+ Some(PluralForm::Russian)
+ }
+ _ => None
+ }
+ }
+}
diff --git a/android/translations-converter/src/main.rs b/android/translations-converter/src/main.rs
index d755913243..d11320b070 100644
--- a/android/translations-converter/src/main.rs
+++ b/android/translations-converter/src/main.rs
@@ -33,7 +33,9 @@
mod android;
mod gettext;
+mod normalize;
+use crate::normalize::Normalize;
use std::{
collections::HashMap,
fs::{self, File},
@@ -45,32 +47,14 @@ fn main() {
let strings_file = File::open(resources_dir.join("values/strings.xml"))
.expect("Failed to open string resources file");
- let mut string_resources: android::StringResources =
+ let string_resources: android::StringResources =
serde_xml_rs::from_reader(strings_file).expect("Failed to read string resources file");
- string_resources.normalize();
- string_resources.retain(|string| string.translatable);
-
- let mut known_urls = HashMap::with_capacity(string_resources.len());
- let mut known_strings = HashMap::with_capacity(string_resources.len());
-
- for string in string_resources {
- let destination = if string.value.starts_with("https://mullvad.net/en/") {
- &mut known_urls
- } else {
- &mut known_strings
- };
-
- if destination
- .insert(string.value.to_string(), string.name)
- .is_some()
- {
- panic!(
- "String {:?} has more than one Android resource ID",
- string.value
- );
- }
- }
+ let (known_urls, known_strings): (HashMap<_, _>, HashMap<_, _>) = string_resources
+ .into_iter()
+ .filter(|resource| resource.translatable)
+ .map(|resource| (resource.value.normalize(), resource.name))
+ .partition(|(string, _id)| string.starts_with("https://mullvad.net/en/"));
let plurals_file = File::open(resources_dir.join("values/plurals.xml"))
.expect("Failed to open plurals resources file");
@@ -133,8 +117,8 @@ fn main() {
for message in template {
match message.value {
- gettext::MsgValue::Invariant(_) => missing_translations.remove(&*message.id),
- gettext::MsgValue::Plural { .. } => missing_plurals.remove(&*message.id),
+ gettext::MsgValue::Invariant(_) => missing_translations.remove(&message.id.normalize()),
+ gettext::MsgValue::Plural { .. } => missing_plurals.remove(&message.id.normalize()),
};
}
@@ -147,8 +131,8 @@ fn main() {
.into_iter()
.inspect(|(missing_translation, id)| println!(" {}: {}", id, missing_translation))
.map(|(id, _)| gettext::MsgEntry {
- id: id.into(),
- value: String::new().into(),
+ id: gettext::MsgString::from_unescaped(&id),
+ value: gettext::MsgString::empty().into(),
}),
)
.expect("Failed to append missing translations to message template file");
@@ -179,30 +163,27 @@ fn main() {
.iter()
.position(|plural| plural.quantity == android::PluralQuantity::One)
.expect("Missing singular variant to use as msgid");
- let id = plural
- .items
- .remove(singular_position)
- .string
- .to_string()
- .into();
+ let id = gettext::MsgString::from_escaped(
+ plural.items.remove(singular_position).string.to_string(),
+ );
let other_position = plural
.items
.iter()
.position(|plural| plural.quantity == android::PluralQuantity::Other)
.expect("Missing other variant to use as msgid_plural");
- let plural_id = plural
- .items
- .remove(other_position)
- .string
- .to_string()
- .into();
+ let plural_id = gettext::MsgString::from_escaped(
+ plural.items.remove(other_position).string.to_string(),
+ );
gettext::MsgEntry {
id,
value: gettext::MsgValue::Plural {
plural_id,
- values: vec!["".into(), "".into()],
+ values: vec![
+ gettext::MsgString::empty().into(),
+ gettext::MsgString::empty().into(),
+ ],
},
}
}),
@@ -263,16 +244,16 @@ fn generate_translations(
for translation in translations {
match translation.value {
gettext::MsgValue::Invariant(translation_value) => {
- if let Some(android_key) = known_strings.remove(&*translation.id) {
+ if let Some(android_key) = known_strings.remove(&translation.id.normalize()) {
localized_strings.push(android::StringResource::new(
android_key,
- &translation_value,
+ &translation_value.normalize(),
));
}
}
gettext::MsgValue::Plural { values, .. } => {
- if let Some(android_key) = known_plurals.remove(&*translation.id) {
- let values = values.into_iter().map(|message| message.to_string());
+ if let Some(android_key) = known_plurals.remove(&translation.id.normalize()) {
+ let values = values.into_iter().map(|message| message.normalize());
localized_plurals.push(android::PluralResource::new(
android_key,
diff --git a/android/translations-converter/src/normalize.rs b/android/translations-converter/src/normalize.rs
new file mode 100644
index 0000000000..9e8af385f4
--- /dev/null
+++ b/android/translations-converter/src/normalize.rs
@@ -0,0 +1,101 @@
+use lazy_static::lazy_static;
+use regex::Regex;
+
+pub trait Normalize {
+ /// Normalize the string value into a common format.
+ ///
+ /// Makes it possible to compare different representations of translation messages.
+ fn normalize(&self) -> String;
+}
+
+mod android {
+ use super::*;
+ use crate::android::StringValue;
+
+ lazy_static! {
+ static ref APOSTROPHES: Regex = Regex::new(r"\\'").unwrap();
+ static ref DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap();
+ static ref PARAMETERS: Regex = Regex::new(r"%[0-9]*\$").unwrap();
+ }
+
+ impl Normalize for StringValue {
+ fn normalize(&self) -> String {
+ // Unescape apostrophes
+ let value = APOSTROPHES.replace_all(&*self, "'");
+ // Unescape double quotes
+ let value = DOUBLE_QUOTES.replace_all(&value, r#"""#);
+ // Mark where parameters are positioned, removing the parameter index
+ let value = PARAMETERS.replace_all(&value, "%");
+
+ // Unescape XML characters
+ htmlize::unescape(value.as_bytes())
+ }
+ }
+}
+
+mod gettext {
+ use super::*;
+ use crate::gettext::MsgString;
+
+ lazy_static! {
+ static ref APOSTROPHE_VARIATION: Regex = Regex::new("’").unwrap();
+ static ref ESCAPED_DOUBLE_QUOTES: Regex = Regex::new(r#"\\""#).unwrap();
+ static ref PARAMETERS: Regex = Regex::new(r"%\([^)]*\)").unwrap();
+ }
+
+ impl Normalize for MsgString {
+ fn normalize(&self) -> String {
+ // Use a single common apostrophe character
+ let string = APOSTROPHE_VARIATION.replace_all(&*self, "'");
+ // Mark where parameters are positioned, removing the parameter name
+ let string = PARAMETERS.replace_all(&string, "%");
+ // Remove escaped double-quotes
+ let string = ESCAPED_DOUBLE_QUOTES.replace_all(&string, r#"""#);
+
+ string.into_owned()
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Normalize;
+
+ #[test]
+ fn normalize_android_string_value() {
+ use crate::android::StringValue;
+
+ let input = StringValue::from_unescaped(concat!(
+ "'Inside single quotes'",
+ r#""Inside double quotes""#,
+ "With parameters: %1$d, %2$s",
+ ));
+
+ let expected = concat!(
+ "\'Inside single quotes\'",
+ r#""Inside double quotes""#,
+ "With parameters: %d, %s",
+ );
+
+ assert_eq!(input.normalize(), expected);
+ }
+
+ #[test]
+ fn normalize_gettext_msg_string() {
+ use crate::gettext::MsgString;
+
+ let input = MsgString::from_unescaped(concat!(
+ "'Inside single quotes'",
+ r#""Inside double quotes""#,
+ "With parameters: %(number)d, %(string)s",
+ ));
+
+ let expected = concat!(
+ "\'Inside single quotes\'",
+ r#""Inside double quotes""#,
+ "With parameters: %d, %s",
+ );
+
+ assert_eq!(input.normalize(), expected);
+ }
+}