Maraiah/maraiah/text.rs

219 lines
5.6 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

//! Text conversion utilities.
/// Formats a binary size string for any given number.
///
/// # Examples
///
/// ```
/// use maraiah::text::to_binsize;
///
/// assert_eq!(to_binsize(5000), "5kB".to_string());
/// ```
pub fn to_binsize(n: u64) -> String
{
const NAMES: [&str; 4] = ["kB", "MB", "GB", "TB"];
// empty size
if n == 0 {
return String::from("empty");
}
// terabytes, gigabytes, megabytes, kilobytes
for i in (1..=4).rev() {
let pow = 1000_u64.pow(i as u32);
if n >= pow {
return format!("{:1}{}", n / pow, NAMES[i - 1]);
}
}
// or, just bytes
format!("{} {}", n, if n == 1 {"byte"} else {"bytes"})
}
/// Encodes or decodes a string in the terminal encryption format.
pub fn fuck_string(s: &[u8]) -> Vec<u8>
{
let mut v = s.to_vec();
let l = s.len();
let mut p = 0;
for _ in 0..l / 4 {
p += 2;
v[p] ^= 0xfe;
v[p + 1] ^= 0xed;
p += 2;
}
for _ in 0..l % 4 {
v[p] ^= 0xfe;
p += 1;
}
v
}
/// Reads a Pascal-style byte string with bounds checking.
///
/// # Examples
///
/// ```
/// use maraiah::text::pascal_str;
///
/// assert_eq!(pascal_str(b"\x0bhello world"), b"hello world"[..].into());
/// assert_eq!(pascal_str(b"\x0chello world"), None);
/// assert_eq!(pascal_str(&[]), None);
/// ```
pub fn pascal_str(b: &[u8]) -> Option<&[u8]>
{
let s = usize::from(*b.get(0)?);
b.get(1..=s)
}
/// Converts input from Mac Roman to a Unicode string.
///
/// # Examples
///
/// ```
/// use maraiah::text::mac_roman_conv;
///
/// assert_eq!(mac_roman_conv(b"p\x8cth"), "påth");
/// assert_eq!(mac_roman_conv(b"I\xd5ve"), "Ive");
/// ```
pub fn mac_roman_conv(s: &[u8]) -> String
{
let mut v = String::with_capacity(s.len());
for &c in s.iter() {
let c = match c {
0x80..=0xFF => TR[usize::from(c) & 0x7F],
b'\r' => '\n',
c => char::from(c),
};
v.push(c);
}
v
}
/// Converts a C-style string from Mac Roman to Unicode.
///
/// # Examples
///
/// ```
/// use maraiah::text::mac_roman_cstr;
///
/// assert_eq!(mac_roman_cstr(b"I\xd5ve awaken\0ed"), "Ive awaken");
/// assert_eq!(mac_roman_cstr(b"I\xd5ve awaken\0"), "Ive awaken");
/// assert_eq!(mac_roman_cstr(b"I\xd5ve awaken"), "Ive awaken");
/// ```
#[inline]
pub fn mac_roman_cstr(s: &[u8]) -> String
{
if let Some(n) = memchr::memchr(0, s) {
mac_roman_conv(&s[..n])
} else {
mac_roman_conv(s)
}
}
/// Converts input from a Unicode string to Mac Roman.
///
/// # Examples
///
/// ```
/// use maraiah::text::to_mac_roman;
///
/// assert_eq!(to_mac_roman("påth"), b"p\x8cth".to_vec());
/// assert_eq!(to_mac_roman("Ive\n"), b"I\xd5ve\r".to_vec());
/// ```
pub fn to_mac_roman(s: &str) -> Vec<u8>
{
let mut v = Vec::with_capacity(s.len());
for c in s.chars() {
let c = match c {
'\n' => b'\r',
'\0'..='\x7f' => c as u8,
c => {
if let Some(c) = TR.iter().position(|&o| o == c) {
c as u8 + 0x80
} else {
0x7f
}
}
};
v.push(c);
}
v
}
/// Pads the output with zeroes.
///
/// # Examples
///
/// ```
/// use maraiah::text::{pad_zero, to_mac_roman};
///
/// let s = to_mac_roman("påth");
///
/// assert_eq!(pad_zero(s.clone(), 8), b"p\x8cth\0\0\0\0".to_vec());
/// assert_eq!(pad_zero(s.clone(), 6), b"p\x8cth\0\0".to_vec());
/// assert_eq!(pad_zero(s.clone(), 4), b"p\x8cth".to_vec());
/// assert_eq!(pad_zero(s.clone(), 2), b"p\x8cth".to_vec());
/// assert_eq!(pad_zero(s.clone(), 0), b"p\x8cth".to_vec());
/// ```
pub fn pad_zero(mut v: Vec<u8>, n: usize) -> Vec<u8>
{
for _ in v.len()..n {
v.push(0);
}
v
}
const TR: [char; 128] =
['\u{00c4}', '\u{00c5}', '\u{00c7}', '\u{00c9}', '\u{00d1}', '\u{00d6}',
'\u{00dc}', '\u{00e1}', '\u{00e0}', '\u{00e2}', '\u{00e4}', '\u{00e3}',
'\u{00e5}', '\u{00e7}', '\u{00e9}', '\u{00e8}', '\u{00ea}', '\u{00eb}',
'\u{00ed}', '\u{00ec}', '\u{00ee}', '\u{00ef}', '\u{00f1}', '\u{00f3}',
'\u{00f2}', '\u{00f4}', '\u{00f6}', '\u{00f5}', '\u{00fa}', '\u{00f9}',
'\u{00fb}', '\u{00fc}', '\u{2020}', '\u{00b0}', '\u{00a2}', '\u{00a3}',
'\u{00a7}', '\u{2022}', '\u{00b6}', '\u{00df}', '\u{00ae}', '\u{00a9}',
'\u{2122}', '\u{00b4}', '\u{00a8}', '\u{2260}', '\u{00c6}', '\u{00d8}',
'\u{221e}', '\u{00b1}', '\u{2264}', '\u{2265}', '\u{00a5}', '\u{00b5}',
'\u{2202}', '\u{2211}', '\u{220f}', '\u{03c0}', '\u{222b}', '\u{00aa}',
'\u{00ba}', '\u{03a9}', '\u{00e6}', '\u{00f8}', '\u{00bf}', '\u{00a1}',
'\u{00ac}', '\u{221a}', '\u{0192}', '\u{2248}', '\u{2206}', '\u{00ab}',
'\u{00bb}', '\u{2026}', '\u{00a0}', '\u{00c0}', '\u{00c3}', '\u{00d5}',
'\u{0152}', '\u{0153}', '\u{2013}', '\u{2014}', '\u{201c}', '\u{201d}',
'\u{2018}', '\u{2019}', '\u{00f7}', '\u{25ca}', '\u{00ff}', '\u{0178}',
'\u{2044}', '\u{20ac}', '\u{2039}', '\u{203a}', '\u{fb01}', '\u{fb02}',
'\u{2021}', '\u{00b7}', '\u{201a}', '\u{201e}', '\u{2030}', '\u{00c2}',
'\u{00ca}', '\u{00c1}', '\u{00cb}', '\u{00c8}', '\u{00cd}', '\u{00ce}',
'\u{00cf}', '\u{00cc}', '\u{00d3}', '\u{00d4}', '\u{f8ff}', '\u{00d2}',
'\u{00da}', '\u{00db}', '\u{00d9}', '\u{0131}', '\u{02c6}', '\u{02dc}',
'\u{00af}', '\u{02d8}', '\u{02d9}', '\u{02da}', '\u{00b8}', '\u{02dd}',
'\u{02db}', '\u{02c7}'];
#[test]
fn to_binsize_integrals()
{
assert_eq!(to_binsize(0), "empty");
assert_eq!(to_binsize(1), "1 byte");
assert_eq!(to_binsize(2), "2 bytes");
assert_eq!(to_binsize(999), "999 bytes");
assert_eq!(to_binsize(1000), "1kB");
assert_eq!(to_binsize(1000 * 7), "7kB");
assert_eq!(to_binsize(1000 * 1000), "1MB");
assert_eq!(to_binsize(1000 * 1000 * 7), "7MB");
assert_eq!(to_binsize(1000 * 1000 * 1000), "1GB");
assert_eq!(to_binsize(1000 * 1000 * 1000 * 7), "7GB");
assert_eq!(to_binsize(1000 * 1000 * 1000 * 1000), "1TB");
assert_eq!(to_binsize(1000 * 1000 * 1000 * 1000 * 7), "7TB");
}
// EOF