monotone

monotone Mtn Source Tree

Root/botan/def_char.cpp

1/*************************************************
2* Default Character Set Handling Source File *
3* (C) 1999-2006 The Botan Project *
4*************************************************/
5
6#include <botan/def_char.h>
7#include <botan/exceptn.h>
8#include <botan/parsing.h>
9
10namespace Botan {
11
12namespace {
13
14/*************************************************
15* Convert from UCS-2 to ISO 8859-1 *
16*************************************************/
17std::string ucs2_to_latin1(const std::string& ucs2)
18 {
19 if(ucs2.size() % 2 == 1)
20 throw Decoding_Error("UCS-2 string has an odd number of bytes");
21
22 std::string latin1;
23
24 for(u32bit j = 0; j != ucs2.size(); j += 2)
25 {
26 const byte c1 = ucs2[j];
27 const byte c2 = ucs2[j+1];
28
29 if(c1 != 0)
30 throw Decoding_Error("UCS-2 has non-Latin1 characters");
31
32 latin1 += (char)c2;
33 }
34
35 return latin1;
36 }
37
38/*************************************************
39* Convert from UTF-8 to ISO 8859-1 *
40*************************************************/
41std::string utf8_to_latin1(const std::string& utf8)
42 {
43 std::string iso8859;
44
45 u32bit position = 0;
46 while(position != utf8.size())
47 {
48 const byte c1 = (byte)utf8[position++];
49
50 if(c1 <= 0x7F)
51 iso8859 += (char)c1;
52 else if(c1 >= 0xC0 && c1 <= 0xC7)
53 {
54 if(position == utf8.size())
55 throw Decoding_Error("UTF-8: sequence truncated");
56
57 const byte c2 = (byte)utf8[position++];
58 const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
59
60 if(iso_char <= 0x7F)
61 throw Decoding_Error("UTF-8: sequence longer than needed");
62
63 iso8859 += (char)iso_char;
64 }
65 else
66 throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
67 }
68
69 return iso8859;
70 }
71
72/*************************************************
73* Convert from ISO 8859-1 to UTF-8 *
74*************************************************/
75std::string latin1_to_utf8(const std::string& iso8859)
76 {
77 std::string utf8;
78 for(u32bit j = 0; j != iso8859.size(); ++j)
79 {
80 const byte c = (byte)iso8859[j];
81
82 if(c <= 0x7F)
83 utf8 += (char)c;
84 else
85 {
86 utf8 += (char)(0xC0 | (c >> 6));
87 utf8 += (char)(0x80 | (c & 0x3F));
88 }
89 }
90 return utf8;
91 }
92
93}
94
95/*************************************************
96* Transcode between character sets *
97*************************************************/
98std::string Default_Charset_Transcoder::transcode(const std::string& str,
99 Character_Set to,
100 Character_Set from) const
101 {
102 if(to == LOCAL_CHARSET)
103 to = LATIN1_CHARSET;
104 if(from == LOCAL_CHARSET)
105 from = LATIN1_CHARSET;
106
107 if(to == from)
108 return str;
109
110 if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
111 return latin1_to_utf8(str);
112 if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
113 return utf8_to_latin1(str);
114 if(from == UCS2_CHARSET && to == LATIN1_CHARSET)
115 return ucs2_to_latin1(str);
116
117 throw Invalid_Argument("Unknown transcoding operation from " +
118 to_string(from) + " to " + to_string(to));
119 }
120
121}

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status