summaryrefslogtreecommitdiffhomepage
path: root/include/unicode/validation.h
blob: b5060c4f57c1efb6dd638e5b7769547e8d01dcef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
//
// Reichwein.IT Unicode Library
//
// Functions for validation of UTF (Unicode Transformation Format) encodings
//

#pragma once

#include "unicode/endian.h"
#include "unicode/iso.h"
#include "unicode/optimization.h"
#include "unicode/predicate.h"
#include "unicode/types.h"
#include "unicode/type_traits.h"
#include "unicode/utf.h"

#include <algorithm>
#include <array>
#include <cstdint>
#include <iterator>
#include <memory>
#include <stdexcept>
#include <string>
#include <type_traits>
#include <utility>

namespace unicode {

 // First variant of is_valid_utf(): Specification of encoding explicitly
 //
 // e.g.
 // unicode::UTF_8
 // unicode::UTF_16
 // unicode::UTF_32
 //
 // see also type_traits.h and utf.h
 template<typename Encoding, std::enable_if_t<is_encoding_v<Encoding>, bool> = true>
 bool is_valid_utf(const typename Encoding::string_type& s)
 {
  return validate_utf<typename Encoding::value_type>(s);
 }

 // Second variant of is_valid_utf(): Specification of encoding via character type
 //
 // see also type_traits.h for is_char
 template<typename T,
  typename Container=std::basic_string<T>,
  std::enable_if_t<is_char_v<T>, bool> = true>
 bool is_valid_utf(const Container& s)
 {
  typedef UTF<utf_iterator<T>, utf_back_insert_iterator<T>> UTF_Trait;
  
  try {
   std::for_each(UTF_Trait::begin(s), UTF_Trait::end(s), [](const char32_t& c){});
  } catch (const std::invalid_argument&) {
   return false;
  }
  return true;
 }

 // Third variant of is_valid_utf(): Specification of encoding via container type
 //
 // see also type_traits.h for is_container
 template<typename Container, std::enable_if_t<is_container_v<Container>, bool> = true>
 bool is_valid_utf(const Container& s)
 {
  typedef UTF<utf_iterator<typename Container::value_type, Container>, utf_back_insert_iterator<typename Container::value_type, Container>> UTF_Trait;
  
  try {
   std::for_each(UTF_Trait::begin(s), UTF_Trait::end(s), [](const char32_t& c){});
  } catch (const std::invalid_argument&) {
   return false;
  }
  return true;
 }

} // namespace unicode