RustPython · youknowone · Apr 17, 2026 · Apr 14, 2026 · Apr 15, 2026 · coderabbitai
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/sre_engine/Cargo.toml b/crates/sre_engine/Cargo.toml
@@ -19,6 +19,7 @@ rustpython-wtf8 = { workspace = true }
 num_enum = { workspace = true }
 bitflags = { workspace = true }
 optional = { workspace = true }
+icu_properties = { workspace = true }
 
 [dev-dependencies]
 criterion = { workspace = true }

diff --git a/crates/sre_engine/src/string.rs b/crates/sre_engine/src/string.rs
@@ -1,3 +1,4 @@
+use icu_properties::props::{CanonicalCombiningClass, EnumeratedProperty};
 use rustpython_wtf8::Wtf8;
 
 #[derive(Debug, Clone, Copy)]
@@ -443,7 +444,10 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
 pub(crate) fn is_uni_alnum(ch: u32) -> bool {
     // TODO: check with cpython
     char::try_from(ch)
-        .map(|x| x.is_alphanumeric())
+        .map(|x| {
+            x.is_alphanumeric()
+                && CanonicalCombiningClass::for_char(x) == CanonicalCombiningClass::NotReordered
+        })
         .unwrap_or(false)
 }
 #[inline]

diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs
@@ -45,7 +45,8 @@ use rustpython_common::{
 };
 
 use icu_properties::props::{
-    BidiClass, BinaryProperty, EnumeratedProperty, GeneralCategory, XidContinue, XidStart,
+    BidiClass, BinaryProperty, CanonicalCombiningClass, EnumeratedProperty, GeneralCategory,
+    XidContinue, XidStart,
 };
 use unicode_casing::CharExt;
 
@@ -946,7 +947,11 @@ impl PyStr {
 
     #[pymethod]
     fn isalnum(&self) -> bool {
-        !self.data.is_empty() && self.char_all(char::is_alphanumeric)
+        !self.data.is_empty()
+            && self.char_all(|c| {
+                c.is_alphanumeric()
+                    && CanonicalCombiningClass::for_char(c) == CanonicalCombiningClass::NotReordered
+            })
     }
 
     #[pymethod]

diff --git a/extra_tests/snippets/builtin_str.py b/extra_tests/snippets/builtin_str.py
@@ -73,6 +73,15 @@
 # assert "Ǳ".title() == "ǲ"
 assert a.isalpha()
 
+# Combining characters differ slightly between Rust and Python
+assert "\u006e".isalnum()
+assert not "\u0303".isalnum()
+assert not "\u006e\u0303".isalnum()
+assert "\u00f1".isalnum()
+assert not "\u0345".isalnum()
+for raw in range(0x0363, 0x036F):
+    assert not chr(raw).isalnum()
+
 s = "1 2 3"
 assert s.split(" ", 1) == ["1", "2 3"]
 assert s.rsplit(" ", 1) == ["1 2", "3"]

diff --git a/extra_tests/snippets/stdlib_re.py b/extra_tests/snippets/stdlib_re.py
@@ -79,3 +79,6 @@
 # Test of fix re.fullmatch POSSESSIVE_REPEAT, issue #7183
 assert re.fullmatch(r"([0-9]++(?:\.[0-9]+)*+)", "1.25.38")
 assert re.fullmatch(r"([0-9]++(?:\.[0-9]+)*+)", "1.25.38").group(0) == "1.25.38"
+
+# Combining characters; issue #7518
+assert not re.match(r"\w", "\u0345"), r"\w should not match U+0345 (category Mn)"