links_normalized/
lib.rs

1//! This crate contains two normalized string datastructures:
2//! - [`Normalized`], which represents unicode normalized strings
3//! - [`Link`], which represents valid normalized redirection target URLs
4//!
5//! [`Normalized`] strings are case-insensitive and ignore whitespace and
6//! control characters. They also perform [NFKC] normalization on the input.
7//! They are used as vanity paths by links.
8//!
9//! [`Link`]s are normalized (in the URI sense) `http`/`https` URLs used as
10//! redirect destinations by links.
11//!
12//! [NFKC]: https://www.unicode.org/reports/tr15/#Norm_Forms
13
14use std::{
15	convert::Infallible,
16	fmt::{Display, Error as FmtError, Formatter},
17	str::FromStr,
18};
19
20#[cfg(feature = "fred")]
21use fred::{
22	error::{RedisError, RedisErrorKind},
23	types::{FromRedis, RedisValue},
24};
25use serde::{Deserialize, Serialize};
26use unicode_normalization::UnicodeNormalization;
27use uriparse::{Scheme, URIReference};
28
29/// A normalized string used for vanity paths.
30///
31/// Allows for storing and comparing vanity paths in a normalized,
32/// case-insensitive way. Also filters out whitespace and control characters.
33#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
34#[serde(try_from = "&str", into = "String")]
35pub struct Normalized(String);
36
37impl Normalized {
38	/// Create a new `Normalized` string, normalizing, filtering, and
39	/// lowercasing the provided string.
40	#[must_use]
41	pub fn new(string: &str) -> Self {
42		Self(
43			string
44				.nfkc()
45				.filter(|c| !c.is_control())
46				.filter(|c| !c.is_whitespace())
47				.collect::<String>()
48				.to_lowercase(),
49		)
50	}
51
52	/// Returns the string this `Normalized` wraps, consuming `self`.
53	#[must_use]
54	pub fn into_string(self) -> String {
55		self.0
56	}
57}
58
59impl Display for Normalized {
60	fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), FmtError> {
61		formatter.write_str(&self.0)
62	}
63}
64
65impl FromStr for Normalized {
66	type Err = Infallible;
67
68	fn from_str(s: &str) -> Result<Self, Self::Err> {
69		Ok(Self::from(s))
70	}
71}
72
73#[cfg(feature = "fred")]
74impl FromRedis for Normalized {
75	fn from_value(value: RedisValue) -> Result<Self, RedisError> {
76		value.into_string().map_or_else(
77			|| {
78				Err(RedisError::new(
79					RedisErrorKind::Parse,
80					"can't convert this type into a Normalized",
81				))
82			},
83			|s| Ok(Self::from(&*s)),
84		)
85	}
86}
87
88impl From<String> for Normalized {
89	fn from(string: String) -> Self {
90		Self::new(string.as_str())
91	}
92}
93
94impl From<&String> for Normalized {
95	fn from(string: &String) -> Self {
96		Self::new(string.as_str())
97	}
98}
99
100impl From<&str> for Normalized {
101	fn from(string: &str) -> Self {
102		Self::new(string)
103	}
104}
105
106impl From<Normalized> for String {
107	fn from(normalized: Normalized) -> Self {
108		normalized.into_string()
109	}
110}
111
112/// The error returned by fallible conversions into `Link`s.
113#[derive(Debug, thiserror::Error)]
114pub enum LinkError {
115	/// The provided value is not a valid URL.
116	#[error("url is invalid")]
117	Invalid,
118	/// The URL is relative (i.e. does not have a scheme and/or host).
119	#[error("url is not absolute")]
120	Relative,
121	/// The URL has a scheme that is not `http` or `https`.
122	#[error("url has a non-http/https scheme")]
123	Scheme,
124	/// The URL contains a password, which is considered potentially unsafe.
125	#[error("url has credentials")]
126	Unsafe,
127}
128
129/// A normalized URL used as the redirect destination. This ensures that the
130/// link is a valid absolute HTTP(S) URL.
131///
132/// The resulting `Link` is guaranteed to have an `http` or `https` scheme, be
133/// an absolute URL, not have a password, and be properly percent encoded. Note
134/// that this doesn't aim to make invalid URLs valid (e.g. by percent encoding
135/// non-ascii characters), but may normalize the provided URL (e.g. by decoding
136/// percent-encoded non-reserved characters or by lowercasing the host). `Link`
137/// should not be used to create a new, valid, properly encoded URL from user
138/// input, only to verify one, as it doesn't provide much useful feedback or
139/// help with encoding an almost valid URL, nor does it do much useful
140/// guesswork.
141#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
142#[serde(try_from = "&str", into = "String")]
143pub struct Link(String);
144
145impl Link {
146	/// Valid Link URL schemes
147	const VALID_SCHEMES: &'static [&'static str] = &["https", "http"];
148
149	/// Create a new Link, checking the provided string.
150	///
151	/// # Errors
152	/// This returns an error if the passed `url` is invalid
153	/// (`LinkError::Invalid`), has a password (`LinkError::Unsafe`), has an
154	/// invalid scheme (`LinkError::Scheme`, valid schemes are `http` and
155	/// `https`), or is not absolute (`LinkError::Relative`).
156	pub fn new(url: &str) -> Result<Self, LinkError> {
157		let mut url = match URIReference::try_from(url) {
158			Ok(url) => url,
159			Err(_) => return Err(LinkError::Invalid),
160		};
161
162		if url.has_password() {
163			return Err(LinkError::Unsafe);
164		}
165
166		url.normalize();
167
168		if !Self::VALID_SCHEMES.contains(&url.scheme().map_or("", Scheme::as_str)) {
169			return Err(LinkError::Scheme);
170		}
171
172		if url.is_uri() && url.has_authority() {
173			Ok(Self(url.to_string()))
174		} else {
175			Err(LinkError::Relative)
176		}
177	}
178
179	/// Create a new Link without performing any checks.
180	///
181	/// # Safety
182	/// This makes no guarantees about the contents of the Link, the validity
183	/// of the link must be ensured some other way before calling this.
184	#[must_use]
185	pub const fn new_unchecked(url: String) -> Self {
186		Self(url)
187	}
188
189	/// Returns the string this `Link` wraps, consuming `self`.
190	#[must_use]
191	pub fn into_string(self) -> String {
192		self.0
193	}
194}
195
196impl Display for Link {
197	fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), FmtError> {
198		formatter.write_str(&self.0)
199	}
200}
201
202impl FromStr for Link {
203	type Err = LinkError;
204
205	fn from_str(s: &str) -> Result<Self, Self::Err> {
206		Self::try_from(s)
207	}
208}
209
210#[cfg(feature = "fred")]
211impl FromRedis for Link {
212	fn from_value(value: RedisValue) -> Result<Self, RedisError> {
213		match value {
214			RedisValue::String(s) => Ok(Self::try_from(&*s)
215				.map_err(|e| RedisError::new(RedisErrorKind::Parse, e.to_string()))?),
216			_ => Err(RedisError::new(
217				RedisErrorKind::Parse,
218				"can't convert this type into a Link",
219			)),
220		}
221	}
222}
223
224impl TryFrom<String> for Link {
225	type Error = LinkError;
226
227	fn try_from(string: String) -> Result<Self, Self::Error> {
228		Self::new(string.as_str())
229	}
230}
231
232impl TryFrom<&String> for Link {
233	type Error = LinkError;
234
235	fn try_from(string: &String) -> Result<Self, Self::Error> {
236		Self::new(string.as_str())
237	}
238}
239
240impl TryFrom<&str> for Link {
241	type Error = LinkError;
242
243	fn try_from(string: &str) -> Result<Self, Self::Error> {
244		Self::new(string)
245	}
246}
247
248impl From<Link> for String {
249	fn from(link: Link) -> Self {
250		link.into_string()
251	}
252}
253
254#[cfg(test)]
255mod tests {
256	use std::cmp::Ordering;
257
258	use super::*;
259
260	#[test]
261	fn normalized_new() {
262		assert_eq!(Normalized::new("BiGbIrD"), Normalized::new("bigbird"));
263		assert_eq!(Normalized::new("Big Bird	"), Normalized::new(" ᴮᴵᴳᴮᴵᴿᴰ"));
264
265		let ohm = "Ω";
266		let omega = "Ω";
267		assert_ne!(ohm, omega);
268		assert_eq!(Normalized::new(ohm), Normalized::new(omega));
269
270		let letters = "ffi";
271		let ligature = "ffi";
272		assert_ne!(letters, ligature);
273		assert_eq!(Normalized::new(letters), Normalized::new(ligature));
274	}
275
276	#[test]
277	fn normalized_from_string() {
278		let ohm = "Ω";
279		let omega = "Ω";
280		let letters = "ffi";
281		let ligature = "ffi";
282
283		assert_ne!(ohm, omega);
284		assert_ne!(letters, ligature);
285
286		assert_eq!(
287			Normalized::from("BiGbIrD".to_string()),
288			Normalized::from("bigbird".to_string())
289		);
290		assert_eq!(
291			Normalized::from("Big Bird	".to_string()),
292			Normalized::from(" ᴮᴵᴳᴮᴵᴿᴰ".to_string())
293		);
294		assert_eq!(
295			Normalized::from(ohm.to_string()),
296			Normalized::from(omega.to_string())
297		);
298		assert_eq!(
299			Normalized::from(letters.to_string()),
300			Normalized::from(ligature.to_string())
301		);
302
303		assert_eq!(
304			Normalized::from(&"BiGbIrD".to_string()),
305			Normalized::from(&"bigbird".to_string())
306		);
307		assert_eq!(
308			Normalized::from(&"Big Bird	".to_string()),
309			Normalized::from(&" ᴮᴵᴳᴮᴵᴿᴰ".to_string())
310		);
311		assert_eq!(
312			Normalized::from(&ohm.to_string()),
313			Normalized::from(&omega.to_string())
314		);
315		assert_eq!(
316			Normalized::from(&letters.to_string()),
317			Normalized::from(&ligature.to_string())
318		);
319
320		assert_eq!(Normalized::new("BiGbIrD"), Normalized::new("bigbird"));
321		assert_eq!(Normalized::new("Big Bird	"), Normalized::new(" ᴮᴵᴳᴮᴵᴿᴰ"));
322		assert_eq!(Normalized::new(ohm), Normalized::new(omega));
323		assert_eq!(Normalized::new(letters), Normalized::new(ligature));
324
325		assert_eq!(
326			"BiGbIrD".parse::<Normalized>().unwrap(),
327			Normalized::new("bigbird")
328		);
329		assert_eq!(
330			"Big Bird	".parse::<Normalized>().unwrap(),
331			Normalized::new(" ᴮᴵᴳᴮᴵᴿᴰ")
332		);
333		assert_eq!(ohm.parse::<Normalized>().unwrap(), Normalized::new(omega));
334		assert_eq!(
335			letters.parse::<Normalized>().unwrap(),
336			Normalized::new(ligature)
337		);
338	}
339
340	#[test]
341	fn normalized_into_string() {
342		assert_eq!(
343			Normalized::new("BiGbIrD").into_string(),
344			Normalized::new("bigbird").into_string()
345		);
346
347		assert_eq!(
348			Normalized::new("BiGbIrD").to_string(),
349			Normalized::new("bigbird").to_string()
350		);
351	}
352
353	#[test]
354	#[cfg(feature = "fred")]
355	fn normalized_from_redis() {
356		assert_eq!(
357			Normalized::from_value(RedisValue::from_static_str("BiG bIrD"))
358				.unwrap()
359				.into_string(),
360			"bigbird".to_string()
361		);
362
363		assert_eq!(
364			Normalized::from_value(RedisValue::Integer(42))
365				.unwrap()
366				.into_string(),
367			"42".to_string()
368		);
369
370		assert_eq!(
371			Normalized::from_value(RedisValue::Null).unwrap_err().kind(),
372			&RedisErrorKind::Parse
373		);
374	}
375
376	#[test]
377	fn normalized_serde() {
378		assert_eq!(
379			Normalized::new("BiGbIrD"),
380			serde_json::from_str::<Normalized>(r#"" ᴮᴵᴳᴮᴵᴿᴰ""#).unwrap()
381		);
382
383		assert_eq!(
384			Normalized::new("BiGbIrD"),
385			serde_json::from_str::<Normalized>(
386				&serde_json::to_string(&Normalized::new(" ᴮᴵᴳᴮᴵᴿᴰ")).unwrap()
387			)
388			.unwrap()
389		);
390	}
391
392	#[test]
393	fn normalized_cmp() {
394		assert_eq!(
395			Normalized::new("Big Bird	").cmp(&Normalized::new(" ᴮᴵᴳᴮᴵᴿᴰ")),
396			Ordering::Equal
397		);
398		assert_eq!(
399			Normalized::new("SmaLlbIrD").cmp(&Normalized::new("smolbird")),
400			Ordering::Less
401		);
402		assert_eq!(
403			Normalized::new(" ˢᴹᵒᶫᴮᴵᴿᴰ").cmp(&Normalized::new("Small Bird	")),
404			Ordering::Greater
405		);
406
407		assert_eq!(
408			Normalized::new("Big Bird	").partial_cmp(&Normalized::new(" ᴮᴵᴳᴮᴵᴿᴰ")),
409			Some(Ordering::Equal)
410		);
411		assert_eq!(
412			Normalized::new("SmaLlbIrD").partial_cmp(&Normalized::new("smolbird")),
413			Some(Ordering::Less)
414		);
415		assert_eq!(
416			Normalized::new(" ˢᴹᵒᶫᴮᴵᴿᴰ").partial_cmp(&Normalized::new("Small Bird	")),
417			Some(Ordering::Greater)
418		);
419
420		let ohm = "Ω";
421		let omega = "Ω";
422		assert_ne!(
423			ohm.cmp(omega),
424			Normalized::new(ohm).cmp(&Normalized::new(omega))
425		);
426		assert_ne!(
427			ohm.partial_cmp(omega),
428			Normalized::new(ohm).partial_cmp(&Normalized::new(omega))
429		);
430		assert!(Normalized::new(ohm) == Normalized::new(omega).clone());
431		assert!(Normalized::new(ohm).clone() == Normalized::new(omega));
432
433		let letters = "ffi";
434		let ligature = "ffi";
435		assert_ne!(
436			letters.cmp(ligature),
437			Normalized::new(letters).cmp(&Normalized::new(ligature))
438		);
439		assert_ne!(
440			letters.partial_cmp(ligature),
441			Normalized::new(letters).partial_cmp(&Normalized::new(ligature))
442		);
443		assert!(Normalized::new(letters) == Normalized::new(ligature).clone());
444		assert!(Normalized::new(letters).clone() == Normalized::new(ligature));
445	}
446
447	#[test]
448	fn link_new() {
449		assert_eq!(
450			Link::new("http://example.com").unwrap().into_string(),
451			"http://example.com/".to_string()
452		);
453
454		assert_eq!(
455			Link::new("http://example.com").unwrap(),
456			Link::new_unchecked("http://example.com/".to_string())
457		);
458
459		assert_eq!(
460			Link::new("https://example.com/test?test=test#test")
461				.unwrap()
462				.into_string(),
463			"https://example.com/test?test=test#test".to_string()
464		);
465
466		assert_eq!(
467			Link::new("https://example.com/test?test=test#test").unwrap(),
468			Link::new_unchecked("https://example.com/test?test=test#test".to_string())
469		);
470
471		assert_eq!(
472			Link::new("HTtPS://eXaMpLe.com?").unwrap().into_string(),
473			"https://example.com/?".to_string()
474		);
475
476		assert_eq!(
477			Link::new("https://username@example.com/")
478				.unwrap()
479				.into_string(),
480			"https://username@example.com/".to_string()
481		);
482
483		assert_eq!(
484			Link::new("https://example.com/th%69%73/%69%73?a=test")
485				.unwrap()
486				.into_string(),
487			"https://example.com/this/is?a=test".to_string()
488		);
489
490		assert_eq!(
491			Link::new(
492				"https://%65%78%61%6d%70%6c%65.%63%6f%6d/%74%68%69%73/%69%73?%61=%74%65%73%74"
493			)
494			.unwrap()
495			.into_string(),
496			"https://example.com/this/is?a=test".to_string()
497		);
498
499		assert_eq!(
500			Link::new("https://example.com/%E1%B4%AE%E1%B4%B5%E1%B4%B3%E1%B4%AE%E1%B4%B5%E1%B4%BF%E1%B4%B0").unwrap().into_string(),
501			"https://example.com/%E1%B4%AE%E1%B4%B5%E1%B4%B3%E1%B4%AE%E1%B4%B5%E1%B4%BF%E1%B4%B0".to_string()
502		);
503
504		assert_eq!(
505			Link::new("https://xn--xmp-qla7xe00a.xn--m-uga3d/")
506				.unwrap()
507				.into_string(),
508			"https://xn--xmp-qla7xe00a.xn--m-uga3d/".to_string()
509		);
510
511		assert!(Link::new("").is_err());
512
513		assert!(Link::new("/test").is_err());
514
515		assert!(Link::new("http:/test").is_err());
516
517		assert!(Link::new("example.com/test").is_err());
518
519		assert!(Link::new("//example.com/test").is_err());
520
521		assert!(Link::new("ftp://example.com").is_err());
522
523		assert!(Link::new("https_colon_slash_slash_example_dot_com_slash_test").is_err());
524
525		assert!(Link::new("https://username:password@example.com").is_err());
526
527		assert!(Link::new("https://êxämpłé.ćóm/ᴮᴵᴳ ᴮᴵᴿᴰ").is_err());
528	}
529
530	#[test]
531	#[cfg(feature = "fred")]
532	fn link_from_redis() {
533		assert_eq!(
534			Link::from_value(RedisValue::from_static_str(
535				"https://EXAMPLE.com/test?test=test#test"
536			))
537			.unwrap(),
538			Link::new("https://example.COM/test?test=test#test").unwrap()
539		);
540
541		assert!(Link::from_value(RedisValue::from_static_str(
542			"https_colon_slash_slash_example_dot_com_slash_test"
543		))
544		.is_err());
545
546		assert_eq!(
547			Link::from_value(RedisValue::Null).unwrap_err().kind(),
548			&RedisErrorKind::Parse
549		);
550	}
551
552	#[test]
553	fn link_serde() {
554		assert_eq!(
555			serde_json::from_str::<Link>(r#""https://EXAMPLE.com/test?test=test#test""#).unwrap(),
556			Link::new("https://example.COM/test?test=test#test").unwrap()
557		);
558
559		assert_eq!(
560			serde_json::from_str::<Link>(
561				&serde_json::to_string(
562					&Link::new("https://EXAMPLE.com/test?test=test#test").unwrap()
563				)
564				.unwrap()
565			)
566			.unwrap(),
567			Link::new("https://example.COM/test?test=test#test").unwrap()
568		);
569
570		assert!(serde_json::from_str::<Link>(
571			r#""https_colon_slash_slash_example_dot_com_slash_test""#
572		)
573		.is_err());
574	}
575
576	#[test]
577	fn link_cmp() {
578		assert_eq!(
579			Link::new("https://example.com/test?test=test#test")
580				.unwrap()
581				.cmp(&Link::new("https://example.com/test?test=test#test").unwrap()),
582			Ordering::Equal
583		);
584
585		assert_eq!(
586			Link::new("https://example.com/test?test=test#test")
587				.unwrap()
588				.partial_cmp(&Link::new("https://example.com/test?test=test#test").unwrap()),
589			Some(Ordering::Equal)
590		);
591
592		assert_eq!(
593			Link::new("https://example.com/test?test=test#test")
594				.unwrap()
595				.into_string()
596				.cmp(
597					&Link::new("https://xn--xmp-qla7xe00a.xn--m-uga3d/")
598						.unwrap()
599						.into_string()
600				),
601			Ordering::Less
602		);
603
604		assert_eq!(
605			Link::new("https://xn--xmp-qla7xe00a.xn--m-uga3d/")
606				.unwrap()
607				.into_string()
608				.partial_cmp(
609					&Link::new("https://example.com/test?test=test#test")
610						.unwrap()
611						.into_string()
612				),
613			Some(Ordering::Greater)
614		);
615	}
616
617	#[test]
618	fn link_to_from_string() {
619		assert_eq!(
620			Link::new("http://example.com").unwrap().to_string(),
621			"http://example.com/".to_string()
622		);
623
624		assert_eq!(
625			Link::try_from("http://example.com").unwrap().to_string(),
626			"http://example.com/".to_string()
627		);
628
629		assert_eq!(
630			Link::try_from("http://example.com".to_string())
631				.unwrap()
632				.to_string(),
633			"http://example.com/".to_string()
634		);
635
636		assert_eq!(
637			Link::try_from(&"http://example.com".to_string())
638				.unwrap()
639				.to_string(),
640			"http://example.com/".to_string()
641		);
642
643		assert_eq!(
644			"http://example.com".parse::<Link>().unwrap().into_string(),
645			"http://example.com/".to_string()
646		);
647
648		assert_eq!(
649			Link::new("https://example.com/test?test=test#test")
650				.unwrap()
651				.to_string(),
652			"https://example.com/test?test=test#test".to_string()
653		);
654
655		assert_eq!(
656			"https://example.com/test?test=test#test"
657				.parse::<Link>()
658				.unwrap()
659				.into_string(),
660			"https://example.com/test?test=test#test".to_string()
661		);
662	}
663}