diff options
| author | Jonatan Rhodin <jonatan.rhodin@mullvad.net> | 2025-04-04 13:36:32 +0200 |
|---|---|---|
| committer | Jonatan Rhodin <jonatan.rhodin@mullvad.net> | 2025-04-04 13:36:32 +0200 |
| commit | 353f3d507f39489dfaa8d0c410e0285078c7903f (patch) | |
| tree | b3a21569c4e01080ac5ee68e8caffe4919ddd865 /android | |
| parent | 8feb87cfa9a41fe4cd8f70c2e167dc04edda9bfa (diff) | |
| download | mullvadvpn-353f3d507f39489dfaa8d0c410e0285078c7903f.tar.xz mullvadvpn-353f3d507f39489dfaa8d0c410e0285078c7903f.zip | |
Add support for different orderings of arguments for translations
Diffstat (limited to 'android')
| -rw-r--r-- | android/translations-converter/src/android/plurals.rs | 2 | ||||
| -rw-r--r-- | android/translations-converter/src/android/string_value.rs | 132 | ||||
| -rw-r--r-- | android/translations-converter/src/android/strings.rs | 17 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/messages.rs | 103 | ||||
| -rw-r--r-- | android/translations-converter/src/gettext/mod.rs | 2 | ||||
| -rw-r--r-- | android/translations-converter/src/main.rs | 16 | ||||
| -rw-r--r-- | android/translations-converter/src/normalize.rs | 13 |
7 files changed, 240 insertions, 45 deletions
diff --git a/android/translations-converter/src/android/plurals.rs b/android/translations-converter/src/android/plurals.rs index 81d8177449..288fe9c566 100644 --- a/android/translations-converter/src/android/plurals.rs +++ b/android/translations-converter/src/android/plurals.rs @@ -92,7 +92,7 @@ impl PluralResource { let items = values .map(|(quantity, string)| PluralVariant { quantity, - string: StringValue::from_unescaped(&string), + string: StringValue::from_unescaped(&string, None), }) .collect(); diff --git a/android/translations-converter/src/android/string_value.rs b/android/translations-converter/src/android/string_value.rs index 9a01e9df86..d7f56de35c 100644 --- a/android/translations-converter/src/android/string_value.rs +++ b/android/translations-converter/src/android/string_value.rs @@ -17,14 +17,14 @@ impl StringValue { /// they don't have any. Indices are assigned sequentially starting from the previously /// specified index plus one, or starting from one if there aren't any previously specified /// indices. - pub fn from_unescaped(string: &str) -> Self { + pub fn from_unescaped(string: &str, arg_ordering: Option<&Vec<u8>>) -> Self { let value_with_parameters = htmlize::escape_text(string) .replace('\\', r"\\") .replace('\"', "\\\"") .replace('\'', r"\'"); let value_without_line_breaks = Self::collapse_line_breaks(value_with_parameters); - let value = Self::ensure_parameters_are_indexed(value_without_line_breaks); + let value = Self::ensure_parameters_are_indexed(value_without_line_breaks, arg_ordering); StringValue(value) } @@ -42,7 +42,7 @@ impl StringValue { /// A typical input would be something like `Things are %d, %3$s and %s`, and this method /// would update the string so that all parameters have indices: `Things are %1$d, %3$s and /// %4$s`. - fn ensure_parameters_are_indexed(original: String) -> String { + fn ensure_parameters_are_indexed(original: String, arg_ordering: Option<&Vec<u8>>) -> String { static PARAMETER_INDEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\d+)\$").unwrap()); @@ -50,6 +50,18 @@ impl StringValue { let mut output = parts.next().unwrap().to_owned(); let mut offset = 1; + if let Some(ordering) = arg_ordering { + // Check for mismatch between number of args in original and arg_ordering + if ordering.len() != parts.clone().enumerate().count() { + panic!("Input string has different number of parameters to arg_ordering") + } + // Check if any parameter index in arg_ordering is higher than + // the total number of args + if ordering.iter().any(|i| *i > ordering.len() as u8) { + panic!("arg_ordering contains parameter that is too large") + } + } + for (index, part) in parts.enumerate() { let index = index as isize; @@ -70,7 +82,19 @@ impl StringValue { output.push('%'); } else { // String doesn't have a parameter index, so it is added - write!(&mut output, "%{}$", index + offset).expect("formatting failed"); + // If we have a specific arg_ordering, we will use this, + // if not we will fall back on index + offset + let parameter_index = match arg_ordering { + Some(ordering) => ordering[index as usize] as isize, + None => index + offset, + }; + + // Check for illegal parameter index + if parameter_index == 0 { + panic!("Parameter index is less than 1") + } + + write!(&mut output, "%{}$", parameter_index).expect("formatting failed"); } output.push_str(part); @@ -109,11 +133,14 @@ mod tests { #[test] fn android_escaping() { - let input = StringValue::from_unescaped(concat!( - r"A backslash \", - r#""Inside double quotes""#, - "'Inside single quotes'", - )); + let input = StringValue::from_unescaped( + concat!( + r"A backslash \", + r#""Inside double quotes""#, + "'Inside single quotes'", + ), + None, + ); let expected = concat!( r"A backslash \\", @@ -131,6 +158,7 @@ mod tests { a multi-line string that should be collapsed into a single line", + None, ); let expected = "This is a multi-line string that should be collapsed into a single line"; @@ -140,10 +168,10 @@ mod tests { #[test] fn xml_escaping() { - let input = StringValue::from_unescaped(concat!( - "An ampersand: &", - "<tag>A dummy fake XML tag</tag>", - )); + let input = StringValue::from_unescaped( + concat!("An ampersand: &", "<tag>A dummy fake XML tag</tag>",), + None, + ); let expected = concat!( "An ampersand: &", @@ -157,14 +185,14 @@ mod tests { fn doesnt_change_parameter_indices() { let original = "%1$d %3$s %9$s %6$d %7$d"; - let input = StringValue::from_unescaped(original); + let input = StringValue::from_unescaped(original, None); assert_eq!(input.to_string(), original); } #[test] fn adds_parameter_indices() { - let input = StringValue::from_unescaped("%d %s %s %d"); + let input = StringValue::from_unescaped("%d %s %s %d", None); let expected = "%1$d %2$s %3$s %4$d"; @@ -173,7 +201,7 @@ mod tests { #[test] fn correctly_updates_generated_index_offset_based_on_existing_indices() { - let input = StringValue::from_unescaped("%d %4$s %d %2$s %d"); + let input = StringValue::from_unescaped("%d %4$s %d %2$s %d", None); let expected = "%1$d %4$s %5$d %2$s %3$d"; @@ -201,4 +229,76 @@ mod tests { assert_eq!(deserialized.value, expected); } + + #[test] + fn if_argument_ordering_is_none_should_use_sequential_order_of_arguments() { + let input = StringValue::from_unescaped("%s again %s", None); + + let expected = "%1$s again %2$s"; + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn if_argument_ordering_is_reversed_should_use_argument_ordering() { + let input = StringValue::from_unescaped("%s almost %s", Some([2, 1].to_vec().as_ref())); + + let expected = "%2$s almost %1$s"; + + assert_eq!(input.to_string(), expected); + } + + #[test] + fn if_argument_is_repeated_should_use_argument_ordering() { + let input = StringValue::from_unescaped( + "%s was a %s and a %s and almost a %s", + Some([1, 2, 3, 1].to_vec().as_ref()), + ); + + let expected = "%1$s was a %2$s and a %3$s and almost a %1$s"; + + assert_eq!(input.to_string(), expected); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_one_args_and_string_has_zero_arg_should_panic() { + StringValue::from_unescaped("", Some([1].to_vec().as_ref())); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_zero_args_and_string_has_one_argument_should_panic() { + StringValue::from_unescaped("%s", Some([].to_vec().as_ref())); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_zero_args_and_text_has_args_should_panic() { + StringValue::from_unescaped("%s", Some([].to_vec().as_ref())); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_more_args_should_throw_should_panic() { + StringValue::from_unescaped("%s", Some([1, 2].to_vec().as_ref())); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_less_args_should_panic() { + StringValue::from_unescaped("%s %s", Some([1].to_vec().as_ref())); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_zero_as_arg_should_panic() { + StringValue::from_unescaped("%s", Some([0].to_vec().as_ref())); + } + + #[test] + #[should_panic] + fn if_arg_ordering_has_no_1_as_arg_should_should_panic() { + StringValue::from_unescaped("%s", Some([2].to_vec().as_ref())); + } } diff --git a/android/translations-converter/src/android/strings.rs b/android/translations-converter/src/android/strings.rs index 6a9156f341..a3bb25708a 100644 --- a/android/translations-converter/src/android/strings.rs +++ b/android/translations-converter/src/android/strings.rs @@ -73,11 +73,11 @@ impl StringResource { /// Create a new Android string resource entry. /// /// The name is the resource ID, and the value will be properly escaped. - pub fn new(name: String, value: &str) -> Self { + pub fn new(name: String, value: &str, arg_ordering: Option<&Vec<u8>>) -> Self { StringResource { name, translatable: true, - value: StringValue::from_unescaped(value), + value: StringValue::from_unescaped(value, arg_ordering), } } } @@ -135,12 +135,12 @@ mod tests { StringResource { name: "first".to_owned(), translatable: true, - value: StringValue::from_unescaped("First string"), + value: StringValue::from_unescaped("First string", None), }, StringResource { name: "second".to_owned(), translatable: false, - value: StringValue::from_unescaped("Second string"), + value: StringValue::from_unescaped("Second string", None), }, ]); @@ -171,15 +171,18 @@ mod tests { StringResource { name: "first".to_owned(), translatable: true, - value: StringValue::from_unescaped("First string is split in two lines"), + value: StringValue::from_unescaped("First string is split in two lines", None), }, StringResource { name: "second".to_owned(), translatable: false, - value: StringValue::from_unescaped(concat!( + value: StringValue::from_unescaped( + concat!( "Second string is also split but it also has some weird whitespace inside the ", "tags and some indentation", - )), + ), + None, + ), }, ]); diff --git a/android/translations-converter/src/gettext/messages.rs b/android/translations-converter/src/gettext/messages.rs index 25316afae4..2f5d8c73c9 100644 --- a/android/translations-converter/src/gettext/messages.rs +++ b/android/translations-converter/src/gettext/messages.rs @@ -1,8 +1,10 @@ use super::{msg_string::MsgString, parser::Parser, plural_form::PluralForm}; +use regex::Regex; use std::{ fs::File, io::{BufRead, BufReader}, path::Path, + sync::LazyLock, }; /// A parsed gettext messages file. @@ -22,7 +24,7 @@ pub struct MsgEntry { /// A message string or plural set in a gettext translation file. #[derive(Clone, Debug)] pub enum MsgValue { - Invariant(MsgString), + Invariant(MsgString, Option<Vec<u8>>), Plural { plural_id: MsgString, values: Vec<MsgString>, @@ -57,8 +59,8 @@ impl Messages { /// The plural form for the messages is left unconfigured. pub fn starting_with(id: MsgString, msg_str: MsgString) -> Self { let first_entry = MsgEntry { - id, - value: MsgValue::Invariant(msg_str), + id: id.clone(), + value: MsgValue::Invariant(msg_str.clone(), argument_ordering(id, msg_str)), }; Messages { @@ -70,8 +72,8 @@ impl Messages { /// Add a non-plural entry. pub fn add(&mut self, id: MsgString, msg_str: MsgString) { let entry = MsgEntry { - id, - value: MsgValue::Invariant(msg_str), + id: id.clone(), + value: MsgValue::Invariant(msg_str.clone(), argument_ordering(id, msg_str)), }; self.entries.push(entry); @@ -99,10 +101,38 @@ impl IntoIterator for Messages { impl From<MsgString> for MsgValue { fn from(string: MsgString) -> Self { - MsgValue::Invariant(string) + MsgValue::Invariant(string, None) + } +} + +static NAMED_ARGUMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"%\([a-zA-Z]+\)").unwrap()); + +fn argument_ordering(id: MsgString, msg_str: MsgString) -> Option<Vec<u8>> { + if NAMED_ARGUMENT.is_match(&id) && NAMED_ARGUMENT.is_match(&msg_str) { + // Extract arguments in id + let id_args = extract_arguments(id); + // Extract arguments in translation + let value_args = extract_arguments(msg_str); + // Set index as id order and value as translation order + Some( + id_args + .iter() + .map(|id_arg| value_args.iter().position(|value_arg| value_arg == id_arg)) + .map(|f| f.unwrap() as u8 + 1) + .collect(), + ) + } else { + None } } +fn extract_arguments(msg: MsgString) -> Vec<String> { + NAMED_ARGUMENT + .find_iter(&msg) + .map(|s| String::from(s.as_str())) + .collect() +} + #[derive(Debug, thiserror::Error)] pub enum Error { /// Parser error while parsing file @@ -113,3 +143,64 @@ pub enum Error { #[error("Failed to read from the input file")] Io(#[from] std::io::Error), } + +#[cfg(test)] +mod tests { + use crate::gettext::messages::argument_ordering; + use crate::gettext::MsgString; + + #[test] + fn if_message_has_no_argument_should_have_no_argument_ordering() { + let msg_id = MsgString::from_escaped("This is a text"); + let msg_str = MsgString::from_escaped("Det här är en text"); + let argument_ordering = argument_ordering(msg_id, msg_str); + + let expected = None; + + assert_eq!(argument_ordering, expected); + } + + #[test] + fn if_message_has_no_translation_should_have_no_argument_ordering() { + let msg_id = MsgString::from_escaped("This is a %(text)"); + let msg_str = MsgString::from_escaped(""); + let argument_ordering = argument_ordering(msg_id, msg_str); + + let expected = None; + + assert_eq!(argument_ordering, expected); + } + + #[test] + fn if_argument_ordering_is_same_should_have_sequential_ordering() { + let msg_id = MsgString::from_escaped("This is a %(text) and %(star)"); + let msg_str = MsgString::from_escaped("Det här är en %(text) och %(star)"); + let argument_ordering = argument_ordering(msg_id, msg_str); + + let expected = Some([1, 2].to_vec()); + + assert_eq!(argument_ordering, expected); + } + + #[test] + fn if_argument_ordering_is_reversed_should_have_reversed_ordering() { + let msg_id = MsgString::from_escaped("This is a %(text) and %(star)"); + let msg_str = MsgString::from_escaped("Det här är en %(star) och %(text)"); + let argument_ordering = argument_ordering(msg_id, msg_str); + + let expected = Some([2, 1].to_vec()); + + assert_eq!(argument_ordering, expected); + } + + #[test] + fn if_argument_is_repeated_should_have_repeated_ordering() { + let msg_id = MsgString::from_escaped("This is a %(text) and %(text)"); + let msg_str = MsgString::from_escaped("Det här är en %(text) och %(text)"); + let argument_ordering = argument_ordering(msg_id, msg_str); + + let expected = Some([1, 1].to_vec()); + + assert_eq!(argument_ordering, expected); + } +} diff --git a/android/translations-converter/src/gettext/mod.rs b/android/translations-converter/src/gettext/mod.rs index ff92f9e6d4..5d4bad92a3 100644 --- a/android/translations-converter/src/gettext/mod.rs +++ b/android/translations-converter/src/gettext/mod.rs @@ -35,7 +35,7 @@ pub fn append_to_template( writeln!(writer, r#"msgid "{}""#, entry.id)?; match entry.value { - MsgValue::Invariant(value) => writeln!(writer, r#"msgstr "{value}""#)?, + MsgValue::Invariant(value, _) => writeln!(writer, r#"msgstr "{value}""#)?, MsgValue::Plural { plural_id, values } => { writeln!(writer, r#"msgid_plural "{plural_id}""#)?; diff --git a/android/translations-converter/src/main.rs b/android/translations-converter/src/main.rs index 314bbf3e44..1d78fedf81 100644 --- a/android/translations-converter/src/main.rs +++ b/android/translations-converter/src/main.rs @@ -11,11 +11,6 @@ //! the message parameters are changed so that they are in a common format, and there is also a //! small workaround for having different apostrophe characters in the GUI in some messages. //! -//! One dangerous assumption for the normalization is that the named parameters for the GUI are -//! supplied in the declared order on Android. This is because it's not possible to figure out the -//! order when only named parameters are used, and Android strings only supported numbered -//! parameters. -//! //! Android's plural resources are also translated using the same principle. It's important to note //! that the singular quantity item (i.e., the item where `quantity="one"`) for each Android plural //! resource will be used as the `msgid` to be search for in the gettext translations file. @@ -150,7 +145,7 @@ fn main() { for message in template { match message.value { - MsgValue::Invariant(_) => missing_translations.remove(&message.id.normalize()), + MsgValue::Invariant(_, _) => missing_translations.remove(&message.id.normalize()), MsgValue::Plural { .. } => missing_plurals.remove(&message.id.normalize()), }; } @@ -235,11 +230,12 @@ fn main() { let mut localized_strings = android::StringResources::new(); for translation in default_translations { match translation.value { - MsgValue::Invariant(_) => { + MsgValue::Invariant(_, arg_ordering) => { if !translation.id.is_empty() { localized_strings.push(android::StringResource::new( translation.id.normalize(), &translation.id.normalize(), + arg_ordering.as_ref(), )); } } @@ -326,10 +322,11 @@ fn generate_relay_translations( for translation in translations { match translation.value { - MsgValue::Invariant(translation_value) => { + MsgValue::Invariant(translation_value, arg_ordering) => { localized_strings.push(android::StringResource::new( translation.id.normalize(), &translation_value.normalize(), + arg_ordering.as_ref(), )); } MsgValue::Plural { .. } => {} @@ -369,11 +366,12 @@ fn generate_translations( for translation in translations { match translation.value { - MsgValue::Invariant(translation_value) => { + MsgValue::Invariant(translation_value, arg_ordering) => { if let Some(android_key) = known_strings.remove(&translation.id.normalize()) { localized_strings.push(android::StringResource::new( android_key, &translation_value.normalize(), + arg_ordering.as_ref(), )); } } diff --git a/android/translations-converter/src/normalize.rs b/android/translations-converter/src/normalize.rs index ace645570e..5428a15481 100644 --- a/android/translations-converter/src/normalize.rs +++ b/android/translations-converter/src/normalize.rs @@ -61,11 +61,14 @@ mod tests { fn normalize_android_string_value() { use crate::android::StringValue; - let input = StringValue::from_unescaped(concat!( - "'Inside single quotes'", - r#""Inside double quotes""#, - "With parameters: %1$d, %2$s", - )); + let input = StringValue::from_unescaped( + concat!( + "'Inside single quotes'", + r#""Inside double quotes""#, + "With parameters: %1$d, %2$s", + ), + None, + ); let expected = concat!( "'Inside single quotes'", |
