From daf361ea763de47d61860df75a1b8c9a88aa17eb Mon Sep 17 00:00:00 2001 From: Max Audron Date: Tue, 6 May 2025 16:33:31 +0200 Subject: fix truncate on unicode fuck unicode all my homies stan ascii --- Cargo.lock | 7 +++++++ Cargo.toml | 2 ++ src/util/formatting/truncate.rs | 21 +++++++++++---------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ef303f4..f892f6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -228,6 +228,7 @@ dependencies = [ "tracing", "tracing-futures", "tracing-subscriber", + "unicode-segmentation", "urlparse", ] @@ -2131,6 +2132,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 8bbfb15..4886700 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,8 @@ async-trait = "0.1" bitflags = "2" +unicode-segmentation = "1" + [dev-dependencies] mockito = "1" diff --git a/src/util/formatting/truncate.rs b/src/util/formatting/truncate.rs index 80caa23..9528492 100644 --- a/src/util/formatting/truncate.rs +++ b/src/util/formatting/truncate.rs @@ -1,3 +1,5 @@ +use unicode_segmentation::UnicodeSegmentation; + /// Truncates a string after a certain number of characters. /// /// Function always tries to truncate on a word boundary. @@ -6,15 +8,7 @@ pub fn truncate(text: &str, len: usize) -> String { if text.len() <= len { return text.to_string(); } - format!( - "{}…", - text[..len] - .rsplitn(2, " ") - .collect::>() - .last() - .copied() - .expect("This can never happen >inb4 it happens") - ) + format!("{}…", text.graphemes(true).take(len).collect::()) } #[cfg(test)] @@ -39,7 +33,7 @@ mod tests { fn test_truncate_with_input_of_greater_length_than_limit() { let input = "some longer text"; let result = truncate(input, input.len() - 1); - assert_eq!("some longer…", result) + assert_eq!("some longer tex…", result) } #[test] @@ -48,4 +42,11 @@ mod tests { let result = truncate(input, input.len() - 1); assert_eq!("somelongertex…", result) } + + #[test] + fn test_truncate_with_unicode() { + let input = "short ° text"; + let result = truncate(input, 7); + assert_eq!(result, "short °…") + } } -- cgit v1.2.3