links_id/
lib.rs

1//! Links IDs are unique identifiers used in the links project to identify
2//! individual links.
3//!
4//! A links ID is a randomly generated 40 bit / 5 byte integer. It is usually
5//! encoded as an 8 character string, starting with a digit (`0-9`) followed by
6//! 7 base 38 characters. The character set used for base 38 is a modified
7//! [`nanoid-dictionary nolookalikesSafe`](https://github.com/CyberAP/nanoid-dictionary#nolookalikessafe),
8//! with 2 additional characters - `X` and `x`. The full charset is (in order):
9//! `6789BCDFGHJKLMNPQRTWXbcdfghjkmnpqrtwxz`.
10
11use std::{
12	fmt::{Debug, Display, Error as FmtError, Formatter},
13	str::FromStr,
14};
15
16#[cfg(feature = "fred")]
17use fred::{
18	error::{RedisError, RedisErrorKind},
19	types::{FromRedis, RedisValue},
20};
21use lazy_static::lazy_static;
22use regex::{Regex, RegexBuilder};
23use serde::{Deserialize, Serialize};
24
25/// The error returned by fallible conversions into IDs.
26#[derive(Debug, thiserror::Error, PartialEq, Eq)]
27pub enum ConversionError {
28	/// The provided value is too large to be represented in an `Id`.
29	#[error("value is too large")]
30	TooLarge,
31	/// The provided value is too small to be represented in an `Id`.
32	#[error("value is too small")]
33	TooSmall,
34	/// The provided value is improperly formatted.
35	#[error("value is in an invalid format")]
36	InvalidFormat,
37}
38
39/// The character set used for base 10 encoding (digits). Used by the first
40/// character of the Id string representation.
41pub const BASE_10_CHARSET: [char; 10] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
42
43/// The character set used for the base 38 encoding of the last 7 characters of
44/// the string representation of an Id.
45pub const BASE_38_CHARSET: [char; 38] = [
46	'6', '7', '8', '9', 'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'T',
47	'W', 'X', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r', 't', 'w', 'x', 'z',
48];
49
50/// Reverse base 38 character set used for decoding encoded IDs.
51///
52/// The numeric value of a base 38 character is available at `ascii - 54`.
53pub const REVERSE_CHARSET: [u8; 69] = [
54	0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 0, 7, 8, 9, 0, 10, 11, 12, 13, 14, 0, 15, 16, 17,
55	0, 18, 0, 0, 19, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 23, 0, 24, 25, 26, 0, 27, 28, 0, 29,
56	30, 0, 31, 32, 33, 0, 34, 0, 0, 35, 36, 0, 37,
57];
58
59/// The offset from the ASCII value of a base 38 character to its index in
60/// `REVERSE_CHARSET`.
61///
62/// To get the index of a character in `REVERSE_CHARSET`, subtract
63/// `REVERSE_CHARSET_BASE_38_OFFSET` from its value (`ascii - this`).
64pub const REVERSE_CHARSET_BASE_38_OFFSET: usize = 54;
65
66/// The 40 bit ID used to identify links in links.
67#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
68#[serde(try_from = "&str", into = "String")]
69pub struct Id([u8; 5]);
70
71impl Id {
72	/// The number of bits in an Id.
73	pub const BITS: usize = 40;
74	/// The number of bytes in an Id.
75	pub const BYTES: usize = 5;
76	/// The number of characters in the usual Id representation.
77	pub const CHARS: usize = 8;
78	/// The maximum value of an Id when represented as a number.
79	pub const MAX: u64 = 2u64.pow(Self::BITS as u32) - 1;
80	/// The minimum value of an Id when represented as a number
81	pub const MIN: u64 = 0;
82
83	/// Check if a string representation of an Id is valid.
84	#[must_use]
85	pub fn is_valid(id: &str) -> bool {
86		lazy_static! {
87			static ref RE: Regex =
88				RegexBuilder::new("^[0123456789][6789BCDFGHJKLMNPQRTWXbcdfghjkmnpqrtwxz]{7}$")
89					.case_insensitive(false)
90					.dot_matches_new_line(false)
91					.ignore_whitespace(false)
92					.multi_line(false)
93					.octal(false)
94					.unicode(true)
95					.build()
96					.unwrap();
97			static ref MAX: String = Id::try_from(Id::MAX).unwrap().to_string();
98		}
99
100		RE.is_match(id) && id <= MAX.as_str()
101	}
102
103	/// Create a new random Id.
104	#[must_use]
105	pub fn new() -> Self {
106		Self(rand::random())
107	}
108
109	/// Convert this `Id` into a `u64`.
110	#[must_use]
111	pub fn to_u64(self) -> u64 {
112		let mut buf = [0u8; 8];
113		buf[3..].copy_from_slice(&self.0);
114
115		u64::from_be_bytes(buf)
116	}
117}
118
119impl Debug for Id {
120	fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), FmtError> {
121		if formatter.alternate() {
122			formatter.debug_tuple("Id").field(&self.0).finish()
123		} else {
124			formatter
125				.debug_tuple("Id")
126				.field(&self.to_string())
127				.finish()
128		}
129	}
130}
131
132impl Display for Id {
133	fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), FmtError> {
134		let mut buf = ['\0'; Self::CHARS];
135		let num = self.to_u64();
136
137		buf[0] = BASE_10_CHARSET[((num / 38u64.pow(Self::CHARS as u32 - 1)) % 10) as usize];
138
139		for (i, char) in buf.iter_mut().enumerate().skip(1) {
140			let index = (num / 38u64.pow((Self::CHARS - 1 - i) as u32)) % 38;
141
142			*char = BASE_38_CHARSET[index as usize];
143		}
144
145		let buf: String = buf.iter().collect();
146		formatter.write_str(&buf)
147	}
148}
149
150impl Default for Id {
151	fn default() -> Self {
152		Self::new()
153	}
154}
155
156impl FromStr for Id {
157	type Err = ConversionError;
158
159	fn from_str(s: &str) -> Result<Self, Self::Err> {
160		Self::try_from(s)
161	}
162}
163
164#[cfg(feature = "fred")]
165impl FromRedis for Id {
166	fn from_value(value: RedisValue) -> Result<Self, RedisError> {
167		match value {
168			RedisValue::String(s) => Ok(Self::try_from(&*s)
169				.map_err(|e| RedisError::new(RedisErrorKind::Parse, e.to_string()))?),
170			RedisValue::Bytes(b) => Ok(Self::try_from(&*b)
171				.map_err(|e| RedisError::new(RedisErrorKind::Parse, e.to_string()))?),
172			_ => Err(RedisError::new(
173				RedisErrorKind::Parse,
174				"can't convert this type into an ID",
175			)),
176		}
177	}
178}
179
180impl From<[u8; 5]> for Id {
181	fn from(bytes: [u8; 5]) -> Self {
182		Self(bytes)
183	}
184}
185
186impl TryFrom<&[u8]> for Id {
187	type Error = ConversionError;
188
189	fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
190		match bytes.len() {
191			5 => Ok(Self(
192				bytes.try_into().map_err(|_| Self::Error::InvalidFormat)?,
193			)),
194			0..=4 => Err(Self::Error::TooSmall),
195			_ => Err(Self::Error::TooLarge),
196		}
197	}
198}
199
200impl TryFrom<&str> for Id {
201	type Error = ConversionError;
202
203	fn try_from(string: &str) -> Result<Self, Self::Error> {
204		if string.len() < Self::CHARS {
205			Err(ConversionError::TooSmall)
206		} else if string.len() > Self::CHARS {
207			Err(ConversionError::TooLarge)
208		} else if !Self::is_valid(string) {
209			Err(ConversionError::InvalidFormat)
210		} else {
211			// Panic: `string` has already been checked to have only valid characters, so
212			// this always succeeds.
213			let mut num = string.chars().next().unwrap().to_digit(10).unwrap() as u64
214				* 38u64.pow((Self::CHARS - 1) as u32);
215
216			for (i, char) in string.chars().rev().enumerate().take(Self::CHARS - 1) {
217				// Panic (indexing): `string` has already been checked to have only valid
218				// characters, so this always succeeds.
219				num += REVERSE_CHARSET[char.encode_utf8(&mut [0u8]).as_bytes()[0] as usize
220					- REVERSE_CHARSET_BASE_38_OFFSET] as u64
221					* 38u64.pow(i as u32);
222			}
223
224			Self::try_from(num)
225		}
226	}
227}
228
229impl TryFrom<String> for Id {
230	type Error = ConversionError;
231
232	fn try_from(string: String) -> Result<Self, Self::Error> {
233		Self::try_from(string.as_str())
234	}
235}
236
237impl TryFrom<&String> for Id {
238	type Error = ConversionError;
239
240	fn try_from(string: &String) -> Result<Self, Self::Error> {
241		Self::try_from(string.as_str())
242	}
243}
244
245impl TryFrom<u64> for Id {
246	type Error = ConversionError;
247
248	fn try_from(num: u64) -> Result<Self, Self::Error> {
249		if num > Self::MAX {
250			Err(ConversionError::TooLarge)
251		} else {
252			let mut buf = [0u8; Self::BYTES];
253			buf.copy_from_slice(&num.to_be_bytes()[3..]);
254
255			Ok(Self(buf))
256		}
257	}
258}
259
260impl From<Id> for u64 {
261	fn from(id: Id) -> Self {
262		id.to_u64()
263	}
264}
265
266impl From<Id> for String {
267	fn from(id: Id) -> Self {
268		id.to_string()
269	}
270}
271
272impl From<Id> for [u8; 5] {
273	fn from(id: Id) -> [u8; 5] {
274		id.0
275	}
276}
277
278#[cfg(test)]
279mod tests {
280	use std::collections::HashMap;
281
282	use super::*;
283
284	#[test]
285	fn is_valid() {
286		assert!(Id::is_valid("1wqLjdjd")); // one valid Id
287		assert!(Id::is_valid("06789BCD")); // another Id
288		assert!(Id::is_valid(&Id::new().to_string())); // any generated Id is valid
289		assert!(Id::is_valid("06666666")); // Id::MIN as an Id
290		assert!(Id::is_valid("9dDbKpJP")); // Id::MAX as an Id
291		assert!(!Id::is_valid("𝟵𝚍𝐃𝐛𝓚𝐩𝐉𝑷")); // characters that may look good, but aren't
292		assert!(!Id::is_valid("9dDbKpJQ")); // Id::MAX + 1 as an Id (too large)
293		assert!(!Id::is_valid("00000000")); // Invalid base 38 characters
294		assert!(!Id::is_valid("xHJ6CH79")); // invalid base 10 character
295		assert!(!Id::is_valid("06789BC")); // too short
296		assert!(!Id::is_valid("06789BCDD")); // too long
297		assert!(!Id::is_valid("9pqrtwxz")); // too large for 40 bits
298		assert!(!Id::is_valid("")); // empty string
299		assert!(!Id::is_valid("an invalid id")); // not even close
300	}
301
302	#[test]
303	fn new() {
304		assert_ne!(Id::new(), Id::new());
305	}
306
307	#[test]
308	fn to_u64() {
309		assert_eq!(
310			Id([0x01, 0x02, 0x03, 0x04, 0x05]).to_u64(),
311			0x01_02_03_04_05_u64
312		);
313
314		let id = Id::new();
315		assert_eq!(Id::try_from(id.to_u64()).unwrap(), id);
316	}
317
318	#[test]
319	fn to_string() {
320		assert_eq!(Id([0x21, 0x22, 0x23, 0x24, 0x25]).to_string(), "1HJ6CH79");
321		assert_eq!(Id([0x00, 0x22, 0x44, 0x66, 0x88]).to_string(), "06FHjHkx");
322
323		assert_eq!(
324			format!("{}", Id([0x21, 0x22, 0x23, 0x24, 0x25])),
325			"1HJ6CH79"
326		);
327		assert_eq!(
328			format!("{}", Id([0x00, 0x22, 0x44, 0x66, 0x88])),
329			"06FHjHkx"
330		);
331
332		assert_eq!(
333			format!("{:?}", Id([0x21, 0x22, 0x23, 0x24, 0x25])),
334			r#"Id("1HJ6CH79")"#
335		);
336		assert_eq!(
337			format!("{:?}", Id([0x00, 0x22, 0x44, 0x66, 0x88])),
338			r#"Id("06FHjHkx")"#
339		);
340
341		assert_eq!(
342			format!("{:#?}", Id([0x21, 0x22, 0x23, 0x24, 0x25]))
343				.split_ascii_whitespace()
344				.collect::<String>(),
345			"Id([33,34,35,36,37,],)"
346		);
347		assert_eq!(
348			format!("{:#?}", Id([0x00, 0x22, 0x44, 0x66, 0x88]))
349				.split_ascii_whitespace()
350				.collect::<String>(),
351			"Id([0,34,68,102,136,],)"
352		);
353	}
354
355	#[test]
356	fn default() {
357		assert_ne!(Id::default(), Id::default());
358	}
359
360	#[test]
361	#[cfg(feature = "fred")]
362	fn from_redis() {
363		assert_eq!(
364			Id([0x11, 0x33, 0x55, 0x77, 0x99]),
365			Id::from_value(RedisValue::from_static_str("0fXMgWQz")).unwrap()
366		);
367
368		assert_eq!(
369			Id([0x00, 0x22, 0x44, 0x66, 0x88]),
370			Id::from_value(RedisValue::from_static(&[0x00, 0x22, 0x44, 0x66, 0x88])).unwrap()
371		);
372
373		assert!(Id::from_value(RedisValue::Null).is_err());
374	}
375
376	#[test]
377	fn serde() {
378		assert_eq!(
379			serde_json::to_string(&Id([0x73, 0x65, 0x72, 0x64, 0x65])).unwrap(),
380			r#""4Ld9TJrd""#
381		);
382
383		assert_eq!(
384			serde_json::from_str::<Id>(r#""4Ld9TJrd""#).unwrap(),
385			Id([0x73, 0x65, 0x72, 0x64, 0x65])
386		);
387	}
388
389	#[test]
390	fn from_bytes() {
391		assert_eq!(
392			Id([0x01, 0x02, 0x03, 0x04, 0x05]),
393			Id::from([0x01, 0x02, 0x03, 0x04, 0x05])
394		);
395
396		let id = Id::new();
397		assert_eq!(Id::from(id.0), id);
398	}
399
400	#[test]
401	#[allow(clippy::unnecessary_fallible_conversions)] // that's what this test if for
402	fn try_from_bytes() {
403		assert_eq!(
404			Id([0x01, 0x02, 0x03, 0x04, 0x05]),
405			Id::try_from(&[0x01_u8, 0x02_u8, 0x03_u8, 0x04_u8, 0x05_u8][..]).unwrap()
406		);
407
408		assert!(Id::try_from(&[0_u8, 1_u8, 2_u8, 3_u8, 4_u8, 5_u8][..]).is_err());
409		assert!(Id::try_from(&[0_u8, 1_u8, 2_u8, 3_u8][..]).is_err());
410		assert!(Id::try_from(&b""[..]).is_err());
411		assert!(Id::try_from(&b"This is not a valid binary representation of an ID"[..]).is_err());
412
413		let id = Id::new();
414		assert_eq!(Id::try_from(id.0).unwrap(), id);
415	}
416
417	#[test]
418	fn try_from_string() {
419		assert_eq!(
420			Id([0x31, 0x32, 0x33, 0x34, 0x35]),
421			Id::try_from("1qDhG8Tr").unwrap()
422		);
423		assert_eq!(
424			Id([0x11, 0x33, 0x55, 0x77, 0x99]),
425			Id::try_from("0fXMgWQz").unwrap()
426		);
427
428		assert_eq!(
429			Id([0x31, 0x32, 0x33, 0x34, 0x35]),
430			"1qDhG8Tr".parse().unwrap()
431		);
432		assert_eq!(
433			Id([0x11, 0x33, 0x55, 0x77, 0x99]),
434			"0fXMgWQz".parse().unwrap()
435		);
436	}
437
438	#[test]
439	fn invalid_try_from_string() {
440		assert_eq!(
441			ConversionError::TooSmall,
442			dbg!(Id::try_from("1qDhG8T")).unwrap_err()
443		);
444		assert_eq!(
445			ConversionError::TooLarge,
446			dbg!(Id::try_from("0fXMgWQzz")).unwrap_err()
447		);
448		assert_eq!(
449			ConversionError::InvalidFormat,
450			dbg!(Id::try_from("abcdefgh")).unwrap_err()
451		);
452		assert!(dbg!(Id::try_from("I'm an ID!")).is_err());
453		assert!(dbg!(Id::try_from("1496758598688559105")).is_err());
454		assert!(dbg!(Id::try_from("5a9b1460-53be-418a-9115-996c354c46df")).is_err());
455
456		assert_eq!(
457			ConversionError::TooSmall,
458			dbg!("1qDhG8T".parse::<Id>()).unwrap_err()
459		);
460		assert_eq!(
461			ConversionError::TooLarge,
462			dbg!("0fXMgWQzz".parse::<Id>()).unwrap_err()
463		);
464		assert_eq!(
465			ConversionError::InvalidFormat,
466			dbg!("abcdefgh".parse::<Id>()).unwrap_err()
467		);
468		assert!(dbg!("I'm an ID!".parse::<Id>()).is_err());
469		assert!(dbg!("1496758598688559105".parse::<Id>()).is_err());
470		assert!(dbg!("5a9b1460-53be-418a-9115-996c354c46df".parse::<Id>()).is_err());
471	}
472
473	#[test]
474	fn to_from_string() {
475		for _ in 0..10000 {
476			let id = Id::new();
477			assert_eq!(id, Id::try_from(id.to_string()).unwrap());
478			assert_eq!(id, Id::try_from(&id.to_string()).unwrap());
479			assert_eq!(id, id.to_string().parse().unwrap());
480		}
481	}
482
483	#[test]
484	fn try_from_u64() {
485		assert_eq!(
486			Id([0x41, 0x42, 0x43, 0x44, 0x45]),
487			Id::try_from(0x41_42_43_44_45_u64).unwrap()
488		);
489
490		let id = Id::new();
491		assert_eq!(Id::try_from(id.to_u64()).unwrap(), id);
492
493		assert!(Id::try_from(u64::MAX).is_err());
494		assert!(Id::try_from(2u64.pow(41)).is_err());
495	}
496
497	#[test]
498	fn id_into() {
499		for _ in 0..10000 {
500			let arr = rand::random();
501			let id = Id(arr);
502
503			let mut n_arr = [0u8; 8];
504			n_arr[3..].copy_from_slice(&arr[..]);
505			let n = u64::from_be_bytes(n_arr);
506
507			assert_eq!(u64::from(id), n);
508			assert_eq!(<[u8; 5]>::from(id), arr);
509		}
510	}
511
512	#[test]
513	fn id_ord() {
514		for _ in 0..10000 {
515			let a: [u8; 5] = rand::random();
516			let b: [u8; 5] = rand::random();
517
518			assert_eq!(a.cmp(&b), Id(a).cmp(&Id(b)));
519			assert_eq!(a.partial_cmp(&b), Id(a).partial_cmp(&Id(b)));
520		}
521	}
522
523	#[test]
524	fn id_hash() {
525		let mut map = HashMap::new();
526
527		for _ in 0..10000 {
528			let id = Id(rand::random());
529			map.insert(id, id);
530		}
531
532		for k in map.keys() {
533			assert_eq!(map.get(k), Some(k));
534		}
535	}
536}