Struct utf8_ranges::Utf8Sequences [] [src]

pub struct Utf8Sequences {
    // some fields omitted
}

An iterator over ranges of matching UTF-8 byte sequences.

The iteration represents an alternation of comprehensive byte sequences that match precisely the set of UTF-8 encoded scalar values.

A byte sequence corresponds to one of the scalar values in the range given if and only if it completely matches exactly one of the sequences of byte ranges produced by this iterator.

Each sequence of byte ranges matches a unique set of bytes. That is, no two sequences will match the same bytes.

Example

This shows how to match an arbitrary byte sequence against a range of scalar values.

use utf8_ranges::{Utf8Sequences, Utf8Sequence};

fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool {
    for range in seqs {
        if range.matches(bytes) {
            return true;
        }
    }
    false
}

// Test the basic multilingual plane.
let seqs: Vec<_> = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect();

// UTF-8 encoding of 'a'.
assert!(matches(&seqs, &[0x61]));
// UTF-8 encoding of '☃' (`\u{2603}`).
assert!(matches(&seqs, &[0xE2, 0x98, 0x83]));
// UTF-8 encoding of `\u{10348}` (outside the BMP).
assert!(!matches(&seqs, &[0xF0, 0x90, 0x8D, 0x88]));
// Tries to match against a UTF-8 encoding of a surrogate codepoint,
// which is invalid UTF-8, and therefore fails, despite the fact that
// the corresponding codepoint (0xD800) falls in the range given.
assert!(!matches(&seqs, &[0xED, 0xA0, 0x80]));
// And fails against plain old invalid UTF-8.
assert!(!matches(&seqs, &[0xFF, 0xFF]));

If this example seems circuitous, that's because it is! It's meant to be illustrative. In practice, you could just try to decode your byte sequence and compare it with the scalar value range directly. However, this is not always possible (for example, in a byte based automaton).

Methods

impl Utf8Sequences

fn new(start: char, end: char) -> Self

Create a new iterator over UTF-8 byte ranges for the scalar value range given.

Trait Implementations

impl Iterator for Utf8Sequences

type Item = Utf8Sequence

fn next(&mut self) -> Option<Self::Item>

1.0.0fn size_hint(&self) -> (usize, Option<usize>)

1.0.0fn count(self) -> usize

1.0.0fn last(self) -> Option<Self::Item>

1.0.0fn nth(&mut self, n: usize) -> Option<Self::Item>

1.0.0fn chain<U>(self, other: U) -> Chain<Self, U::IntoIter> where U: IntoIterator<Item=Self::Item>

1.0.0fn zip<U>(self, other: U) -> Zip<Self, U::IntoIter> where U: IntoIterator

1.0.0fn map<B, F>(self, f: F) -> Map<Self, F> where F: FnMut(Self::Item) -> B

1.0.0fn filter<P>(self, predicate: P) -> Filter<Self, P> where P: FnMut(&Self::Item) -> bool

1.0.0fn filter_map<B, F>(self, f: F) -> FilterMap<Self, F> where F: FnMut(Self::Item) -> Option<B>

1.0.0fn enumerate(self) -> Enumerate<Self>

1.0.0fn peekable(self) -> Peekable<Self>

1.0.0fn skip_while<P>(self, predicate: P) -> SkipWhile<Self, P> where P: FnMut(&Self::Item) -> bool

1.0.0fn take_while<P>(self, predicate: P) -> TakeWhile<Self, P> where P: FnMut(&Self::Item) -> bool

1.0.0fn skip(self, n: usize) -> Skip<Self>

1.0.0fn take(self, n: usize) -> Take<Self>

1.0.0fn scan<St, B, F>(self, initial_state: St, f: F) -> Scan<Self, St, F> where F: FnMut(&mut St, Self::Item) -> Option<B>

1.0.0fn flat_map<U, F>(self, f: F) -> FlatMap<Self, U, F> where U: IntoIterator, F: FnMut(Self::Item) -> U

1.0.0fn fuse(self) -> Fuse<Self>

1.0.0fn inspect<F>(self, f: F) -> Inspect<Self, F> where F: FnMut(&Self::Item) -> ()

1.0.0fn by_ref(&mut self) -> &mut Self

1.0.0fn collect<B>(self) -> B where B: FromIterator<Self::Item>

1.0.0fn partition<B, F>(self, f: F) -> (B, B) where F: FnMut(&Self::Item) -> bool, B: Default + Extend<Self::Item>

1.0.0fn fold<B, F>(self, init: B, f: F) -> B where F: FnMut(B, Self::Item) -> B

1.0.0fn all<F>(&mut self, f: F) -> bool where F: FnMut(Self::Item) -> bool

1.0.0fn any<F>(&mut self, f: F) -> bool where F: FnMut(Self::Item) -> bool

1.0.0fn find<P>(&mut self, predicate: P) -> Option<Self::Item> where P: FnMut(&Self::Item) -> bool

1.0.0fn position<P>(&mut self, predicate: P) -> Option<usize> where P: FnMut(Self::Item) -> bool

1.0.0fn rposition<P>(&mut self, predicate: P) -> Option<usize> where P: FnMut(Self::Item) -> bool, Self: ExactSizeIterator + DoubleEndedIterator

1.0.0fn max(self) -> Option<Self::Item> where Self::Item: Ord

1.0.0fn min(self) -> Option<Self::Item> where Self::Item: Ord

1.6.0fn max_by_key<B, F>(self, f: F) -> Option<Self::Item> where B: Ord, F: FnMut(&Self::Item) -> B

1.6.0fn min_by_key<B, F>(self, f: F) -> Option<Self::Item> where F: FnMut(&Self::Item) -> B, B: Ord

1.0.0fn rev(self) -> Rev<Self> where Self: DoubleEndedIterator

1.0.0fn unzip<A, B, FromA, FromB>(self) -> (FromA, FromB) where FromA: Default + Extend<A>, FromB: Default + Extend<B>, Self: Iterator<Item=(A, B)>

1.0.0fn cloned<'a, T>(self) -> Cloned<Self> where Self: Iterator<Item=&'a T>, T: 'a + Clone

1.0.0fn cycle(self) -> Cycle<Self> where Self: Clone

fn sum<S>(self) -> S where S: Add<Self::Item, Output=S> + Zero

fn product<P>(self) -> P where P: Mul<Self::Item, Output=P> + One

1.5.0fn cmp<I>(self, other: I) -> Ordering where I: IntoIterator<Item=Self::Item>, Self::Item: Ord

1.5.0fn partial_cmp<I>(self, other: I) -> Option<Ordering> where I: IntoIterator, Self::Item: PartialOrd<I::Item>

1.5.0fn eq<I>(self, other: I) -> bool where I: IntoIterator, Self::Item: PartialEq<I::Item>

1.5.0fn ne<I>(self, other: I) -> bool where I: IntoIterator, Self::Item: PartialEq<I::Item>

1.5.0fn lt<I>(self, other: I) -> bool where I: IntoIterator, Self::Item: PartialOrd<I::Item>

1.5.0fn le<I>(self, other: I) -> bool where I: IntoIterator, Self::Item: PartialOrd<I::Item>

1.5.0fn gt<I>(self, other: I) -> bool where I: IntoIterator, Self::Item: PartialOrd<I::Item>

1.5.0fn ge<I>(self, other: I) -> bool where I: IntoIterator, Self::Item: PartialOrd<I::Item>