mirror of
https://mau.dev/maunium/synapse.git
synced 2024-09-27 20:19:03 +02:00
216 lines
7.4 KiB
Rust
216 lines
7.4 KiB
Rust
|
// Copyright 2022 The Matrix.org Foundation C.I.C.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
use anyhow::bail;
|
||
|
use anyhow::Context;
|
||
|
use anyhow::Error;
|
||
|
use lazy_static::lazy_static;
|
||
|
use regex;
|
||
|
use regex::Regex;
|
||
|
use regex::RegexBuilder;
|
||
|
|
||
|
lazy_static! {
|
||
|
/// Matches runs of non-wildcard characters followed by wildcard characters.
|
||
|
static ref WILDCARD_RUN: Regex = Regex::new(r"([^\?\*]*)([\?\*]*)").expect("valid regex");
|
||
|
}
|
||
|
|
||
|
/// Extract the localpart from a Matrix style ID
|
||
|
pub(crate) fn get_localpart_from_id(id: &str) -> Result<&str, Error> {
|
||
|
let (localpart, _) = id
|
||
|
.split_once(':')
|
||
|
.with_context(|| format!("ID does not contain colon: {id}"))?;
|
||
|
|
||
|
// We need to strip off the first character, which is the ID type.
|
||
|
if localpart.is_empty() {
|
||
|
bail!("Invalid ID {id}");
|
||
|
}
|
||
|
|
||
|
Ok(&localpart[1..])
|
||
|
}
|
||
|
|
||
|
/// Used by `glob_to_regex` to specify what to match the regex against.
|
||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
|
pub enum GlobMatchType {
|
||
|
/// The generated regex will match against the entire input.
|
||
|
Whole,
|
||
|
/// The generated regex will match against words.
|
||
|
Word,
|
||
|
}
|
||
|
|
||
|
/// Convert a "glob" style expression to a regex, anchoring either to the entire
|
||
|
/// input or to individual words.
|
||
|
pub fn glob_to_regex(glob: &str, match_type: GlobMatchType) -> Result<Regex, Error> {
|
||
|
let mut chunks = Vec::new();
|
||
|
|
||
|
// Patterns with wildcards must be simplified to avoid performance cliffs
|
||
|
// - The glob `?**?**?` is equivalent to the glob `???*`
|
||
|
// - The glob `???*` is equivalent to the regex `.{3,}`
|
||
|
for captures in WILDCARD_RUN.captures_iter(glob) {
|
||
|
if let Some(chunk) = captures.get(1) {
|
||
|
chunks.push(regex::escape(chunk.as_str()));
|
||
|
}
|
||
|
|
||
|
if let Some(wildcards) = captures.get(2) {
|
||
|
if wildcards.as_str() == "" {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
let question_marks = wildcards.as_str().chars().filter(|c| *c == '?').count();
|
||
|
|
||
|
if wildcards.as_str().contains('*') {
|
||
|
chunks.push(format!(".{{{question_marks},}}"));
|
||
|
} else {
|
||
|
chunks.push(format!(".{{{question_marks}}}"));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
let joined = chunks.join("");
|
||
|
|
||
|
let regex_str = match match_type {
|
||
|
GlobMatchType::Whole => format!(r"\A{joined}\z"),
|
||
|
|
||
|
// `^|\W` and `\W|$` handle the case where `pattern` starts or ends with a non-word
|
||
|
// character.
|
||
|
GlobMatchType::Word => format!(r"(?:^|\b|\W){joined}(?:\b|\W|$)"),
|
||
|
};
|
||
|
|
||
|
Ok(RegexBuilder::new(®ex_str)
|
||
|
.case_insensitive(true)
|
||
|
.build()?)
|
||
|
}
|
||
|
|
||
|
/// Compiles the glob into a `Matcher`.
|
||
|
pub fn get_glob_matcher(glob: &str, match_type: GlobMatchType) -> Result<Matcher, Error> {
|
||
|
// There are a number of shortcuts we can make if the glob doesn't contain a
|
||
|
// wild card.
|
||
|
let matcher = if glob.contains(['*', '?']) {
|
||
|
let regex = glob_to_regex(glob, match_type)?;
|
||
|
Matcher::Regex(regex)
|
||
|
} else if match_type == GlobMatchType::Whole {
|
||
|
// If there aren't any wildcards and we're matching the whole thing,
|
||
|
// then we simply can do a case-insensitive string match.
|
||
|
Matcher::Whole(glob.to_lowercase())
|
||
|
} else {
|
||
|
// Otherwise, if we're matching against words then can first check
|
||
|
// if the haystack contains the glob at all.
|
||
|
Matcher::Word {
|
||
|
word: glob.to_lowercase(),
|
||
|
regex: None,
|
||
|
}
|
||
|
};
|
||
|
|
||
|
Ok(matcher)
|
||
|
}
|
||
|
|
||
|
/// Matches against a glob
|
||
|
pub enum Matcher {
|
||
|
/// Plain regex matching.
|
||
|
Regex(Regex),
|
||
|
|
||
|
/// Case-insensitive equality.
|
||
|
Whole(String),
|
||
|
|
||
|
/// Word matching. `regex` is a cache of calling [`glob_to_regex`] on word.
|
||
|
Word { word: String, regex: Option<Regex> },
|
||
|
}
|
||
|
|
||
|
impl Matcher {
|
||
|
/// Checks if the glob matches the given haystack.
|
||
|
pub fn is_match(&mut self, haystack: &str) -> Result<bool, Error> {
|
||
|
// We want to to do case-insensitive matching, so we convert to
|
||
|
// lowercase first.
|
||
|
let haystack = haystack.to_lowercase();
|
||
|
|
||
|
match self {
|
||
|
Matcher::Regex(regex) => Ok(regex.is_match(&haystack)),
|
||
|
Matcher::Whole(whole) => Ok(whole == &haystack),
|
||
|
Matcher::Word { word, regex } => {
|
||
|
// If we're looking for a literal word, then we first check if
|
||
|
// the haystack contains the word as a substring.
|
||
|
if !haystack.contains(&*word) {
|
||
|
return Ok(false);
|
||
|
}
|
||
|
|
||
|
// If it does contain the word as a substring, then we need to
|
||
|
// check if it is an actual word by testing it against the regex.
|
||
|
let regex = if let Some(regex) = regex {
|
||
|
regex
|
||
|
} else {
|
||
|
let compiled_regex = glob_to_regex(word, GlobMatchType::Word)?;
|
||
|
regex.insert(compiled_regex)
|
||
|
};
|
||
|
|
||
|
Ok(regex.is_match(&haystack))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_get_domain_from_id() {
|
||
|
get_localpart_from_id("").unwrap_err();
|
||
|
get_localpart_from_id(":").unwrap_err();
|
||
|
get_localpart_from_id(":asd").unwrap_err();
|
||
|
get_localpart_from_id("::as::asad").unwrap_err();
|
||
|
|
||
|
assert_eq!(get_localpart_from_id("@test:foo").unwrap(), "test");
|
||
|
assert_eq!(get_localpart_from_id("@:").unwrap(), "");
|
||
|
assert_eq!(get_localpart_from_id("@test:foo:907").unwrap(), "test");
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn tset_glob() -> Result<(), Error> {
|
||
|
assert_eq!(
|
||
|
glob_to_regex("simple", GlobMatchType::Whole)?.as_str(),
|
||
|
r"\Asimple\z"
|
||
|
);
|
||
|
assert_eq!(
|
||
|
glob_to_regex("simple*", GlobMatchType::Whole)?.as_str(),
|
||
|
r"\Asimple.{0,}\z"
|
||
|
);
|
||
|
assert_eq!(
|
||
|
glob_to_regex("simple?", GlobMatchType::Whole)?.as_str(),
|
||
|
r"\Asimple.{1}\z"
|
||
|
);
|
||
|
assert_eq!(
|
||
|
glob_to_regex("simple?*?*", GlobMatchType::Whole)?.as_str(),
|
||
|
r"\Asimple.{2,}\z"
|
||
|
);
|
||
|
assert_eq!(
|
||
|
glob_to_regex("simple???", GlobMatchType::Whole)?.as_str(),
|
||
|
r"\Asimple.{3}\z"
|
||
|
);
|
||
|
|
||
|
assert_eq!(
|
||
|
glob_to_regex("escape.", GlobMatchType::Whole)?.as_str(),
|
||
|
r"\Aescape\.\z"
|
||
|
);
|
||
|
|
||
|
assert!(glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simple"));
|
||
|
assert!(!glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simples"));
|
||
|
assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simples"));
|
||
|
assert!(glob_to_regex("simple?", GlobMatchType::Whole)?.is_match("simples"));
|
||
|
assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simple"));
|
||
|
|
||
|
assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("some simple."));
|
||
|
assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("simple"));
|
||
|
assert!(!glob_to_regex("simple", GlobMatchType::Word)?.is_match("simples"));
|
||
|
|
||
|
assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("Some @user:foo test"));
|
||
|
assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("@user:foo"));
|
||
|
|
||
|
Ok(())
|
||
|
}
|