Converting text from an encoding to another¶
In order to convert text from a text encoding to another, you must use
the cahute_convert_text()
function. There are several possible
approaches you can take.
Using single pass conversion for small buffers¶
For small blobs of data, such as the name of a program, or the name of a file or directory, you can use a one pass approach with a static buffer.
In this case, you can make a single call to cahute_convert_text()
,
which in nominal cases, should return either:
CAHUTE_OK
, if the input data has been read in its entirety, and no sentinels were detected.CAHUTE_ERROR_TERMINATED
, if a sentinel has been encountered in the source data.
An example implementation is the following:
/* Compile using: gcc convert-simple.c `pkg-config cahute --cflags --libs`. */
#include <stdint.h>
#include <cahute.h>
/* Example buffer to convert. */
static uint16_t example[] = {
'\\',
'\\',
'f',
'l',
's',
'0',
'\\',
'a',
'n',
'g',
0xCE,
'.',
't',
'x',
't'
};
int main(void) {
cahute_context *context;
char buf[128];
cahute_u8 *dest = buf;
size_t dest_size = sizeof(buf);
void const *source = example;
size_t source_size = sizeof(example);
int err, ret = 1;
err = cahute_create_context(&context);
if (err) {
fprintf(
stderr,
"cahute_create_context() has returned error %s.\n",
cahute_get_error_name(err)
);
return 1;
}
err = cahute_convert_text(
context,
(void **)&dest,
&dest_size,
&source,
&source_size,
CAHUTE_TEXT_ENCODING_UTF8,
CAHUTE_TEXT_ENCODING_9860_16_HOST
);
if ((!err || err == CAHUTE_ERROR_TERMINATED) && !dest_size) {
/* We need enough space to add a terminating zero here. */
err = CAHUTE_ERROR_SIZE;
}
if (err) {
printf("Conversion has failed: error 0x%04X has occurred.\n", err);
goto fail;
}
*dest = 0;
printf("Result: %s\n", buf);
ret = 0;
fail:
cahute_destroy_context(context);
return 0;
}
This program displays the following output:
Result: \\fls0\angθ.txt
Using multi pass conversion on output¶
If your source data is larger, you can do multiple passes into a buffer
before either placing the result into a stream, or reallocating a buffer
progressively using realloc()
.
For every pass, you need to call cahute_convert_text()
with the
output set to your buffer, and the input set to your source memory.
On every pass, in nominal circumstances, the function will return one of
the following:
CAHUTE_OK
, if the conversion has terminated successfully, i.e. if there was no more contents to read from the input.CAHUTE_ERROR_TERMINATED
, if the conversion has been interrupted due to a sentinel being found in the source data.CAHUTE_ERROR_SIZE
, if the conversion has run out of space in the output buffer, prompting you to make another pass after reading the contents of the output buffer.
An example that places the result of each pass into the standard output is the following:
/* Compile using: gcc convert-multi-out.c `pkg-config cahute --cflags --libs`. */
#include <stdio.h>
#include <cahute.h>
/* Example long buffer. */
static cahute_u8 const example[] =
"4\x0EN\x0D"
"8\x0ES\x0D"
"1\x0E"
"C\x0D\"ENTREZ LES OPERATEURS\"\x0D"
"\xF7\x08N\x0D\xF7\x08\x7F\x8F:\xF7\x09:\xF7\x0A:\x7F\x8F\x0E"
"D\x0D"
"\xF7\x0B"
"D=0\x0D\x0D\xF7\x00"
"D=43:\xF7\x01\x0D\xF7\x10S,4,\"\xA9"
"\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"3\x7F\xB0N\x11"
"2\x7F\xB0"
"N\x11"
"0\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=42:\xF7\x01\x0D"
"\xF7\x10S,4,\"\x89\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=32:\xF7\x01\x0D\xF7\x10S,4,\"\x99\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"1\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=33:\xF7\x01\x0D\xF7\x10S,4,\"\xB9\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN"
"\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x09\x0D\x0D\xF7\x00"
"C=1:\xF7\x01\x0D"
"\"GOLDORAK\"\x0D\xF7\x02\x0D\"INVALIDE\"\x0D\xF7\x03\x00";
int main(void) {
cahute_context *context;
cahute_u8 buf[64];
void const *src = example;
size_t src_size = sizeof(example);
void *dest;
size_t dest_size;
int i, err;
err = cahute_create_context(&context);
if (err) {
fprintf(
stderr,
"cahute_create_context() has returned error %s.\n",
cahute_get_error_name(err)
);
return 1;
}
for (i = 0;; i++) {
size_t converted;
dest = buf;
dest_size = sizeof(buf);
err = cahute_convert_text(
context,
&dest,
&dest_size,
&src,
&src_size,
CAHUTE_TEXT_ENCODING_UTF8,
CAHUTE_TEXT_ENCODING_9860_8
);
converted = sizeof(buf) - dest_size;
printf(
"Pass %d: %zu bytes converted, error set to 0x%04X:\n",
i,
converted,
err
);
if (converted) {
printf("---\n");
fwrite(buf, 1, converted, stdout);
printf("\n---\n");
}
if (err == CAHUTE_ERROR_SIZE) {
/* Not enough bytes in the destination buffer.
* We want to check that at least one byte has been converted,
* otherwise it means our buffer is not big enough for the
* first byte. */
if (!converted)
break;
continue;
}
break;
}
if (err && err != CAHUTE_ERROR_TERMINATED)
printf("Conversion has failed.\n");
cahute_destroy_context(context);
return 0;
}
This program displays the following output:
Pass 0: 63 bytes converted, error set to 0x0006:
---
4→N
8→S
1→C
"ENTREZ LES OPERATEURS"
While N
While Getkey:
---
Pass 1: 64 bytes converted, error set to 0x0006:
---
WhileEnd:Do:Getkey→D
LpWhile D=0
If D=43:Then
Locate S,4,"×
---
Pass 2: 64 bytes converted, error set to 0x0006:
---
"
S+1→S:N‐1→N
N≠3 And N≠2 And N≠0⇒0→C:IfEnd
If D
---
Pass 3: 63 bytes converted, error set to 0x0006:
---
=42:Then
Locate S,4,"+"
S+1→S:N‐1→N
0→C:IfEnd
If D=32:
---
Pass 4: 64 bytes converted, error set to 0x0006:
---
Then
Locate S,4,"‐"
S+1→S:N‐1→N
N≠1⇒0→C:IfEnd
If
---
Pass 5: 57 bytes converted, error set to 0x0006:
---
D=33:Then
Locate S,4,"÷"
S+1→S:N‐1→N
0→C:IfEnd
---
Pass 6: 56 bytes converted, error set to 0x0000:
---
WhileEnd
If C=1:Then
"GOLDORAK"
Else
"INVALIDE"
IfEnd
---
Multi pass conversion on input¶
If you read your source data from a stream, you can do multiple passes on the input.
For every pass, you need to call cahute_convert_text()
with
the input set to your read buffer. On every pass, in nominal circumstances,
the function will return one of the following:
CAHUTE_ERROR_TERMINATED
, if a sentinel was found in the source data.CAHUTE_ERROR_TRUNC
, if the input was found to be truncated, prompting you to do another pass while keeping the rest of the data.CAHUTE_OK
, if all of the source data was converted, but no sentinel was found, prompting you to do another pass but not crash if no more bytes were available.
An example that reads from a memory area into a read buffer is the following:
/* Compile using: gcc convert-multi-in.c `pkg-config cahute --cflags --libs`. */
#include <stdio.h>
#include <string.h>
#include <cahute.h>
/* Example long buffer. */
static cahute_u8 const example[] =
"4\x0EN\x0D"
"8\x0ES\x0D"
"1\x0E"
"C\x0D\"ENTREZ LES OPERATEURS\"\x0D"
"\xF7\x08N\x0D\xF7\x08\x7F\x8F:\xF7\x09:\xF7\x0A:\x7F\x8F\x0E"
"D\x0D"
"\xF7\x0B"
"D=0\x0D\x0D\xF7\x00"
"D=43:\xF7\x01\x0D\xF7\x10S,4,\"\xA9"
"\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"3\x7F\xB0N\x11"
"2\x7F\xB0"
"N\x11"
"0\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=42:\xF7\x01\x0D"
"\xF7\x10S,4,\"\x89\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=32:\xF7\x01\x0D\xF7\x10S,4,\"\x99\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"1\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=33:\xF7\x01\x0D\xF7\x10S,4,\"\xB9\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN"
"\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x09\x0D\x0D\xF7\x00"
"C=1:\xF7\x01\x0D"
"\"GOLDORAK\"\x0D\xF7\x02\x0D\"INVALIDE\"\x0D\xF7\x03\x00";
int main(void) {
cahute_context *context;
char final_buf[1024];
cahute_u8 read_buf[32];
size_t read_offset = 0;
void *dest = final_buf;
size_t dest_size = sizeof(final_buf);
void const *src;
size_t src_size;
size_t present = 0;
int i, err, ret = 1;
err = cahute_create_context(&context);
if (err) {
fprintf(
stderr,
"cahute_create_context() has returned error %s.\n",
cahute_get_error_name(err)
);
return 1;
}
for (i = 0;; i++) {
size_t read_size;
/* Start by completing the buffer.
* If there are ``present`` bytes already in the buffer, we want
* to add ``sizeof(read_buf) - present`` bytes in the buffer. */
if (read_offset > sizeof(example))
break;
src_size = sizeof(read_buf) - present;
if (src_size > sizeof(example) - read_offset) {
/* There may be less bytes to read than expected, we want to
* complete it this way. */
src_size = sizeof(example) - read_offset;
}
memcpy(&read_buf[present], &example[read_offset], src_size);
read_offset += src_size;
/* We now want to incorporate the already-present bytes into the
* buffer, to prepare for the conversion. */
src = read_buf;
src_size += present;
present = src_size;
/* We now have an ``src`` buffer of ``src_size`` bytes to read,
* we can operate the conversion. */
err = cahute_convert_text(
context,
&dest,
&dest_size,
&src,
&src_size,
CAHUTE_TEXT_ENCODING_UTF8,
CAHUTE_TEXT_ENCODING_9860_8
);
printf(
"Pass %d: %zu bytes read, error set to 0x%04X\n",
i,
present - src_size,
err
);
if (err == CAHUTE_ERROR_TERMINATED)
break; /* A sentinel was found! */
if (!err) {
present = 0;
continue; /* There may be some more bytes to read. */
}
if (err == CAHUTE_ERROR_TRUNC) {
/* Truncated input, we must check that at least one byte has
* been read from the source data to avoid an infinite loop. */
if (src_size == present)
goto fail;
/* Otherwise, we want to copy the leftover bytes at
* the beginning and complete.
*
* NOTE: Both memory areas may overlap, we must use memmove()
* to avoid overwriting data we're trying to copy! */
memmove(read_buf, src, src_size);
present = src_size;
continue;
}
/* Other failure, we must stop! */
goto fail;
}
/* Print the result of the conversion. */
printf("---\n");
fwrite(final_buf, 1, sizeof(final_buf) - dest_size, stdout);
printf("\n---\n");
ret = 0;
fail:
cahute_destroy_context(context);
return ret;
}
This program displays the following output:
Pass 0: 32 bytes read, error set to 0x0000
Pass 1: 31 bytes read, error set to 0x0007
Pass 2: 32 bytes read, error set to 0x0000
Pass 3: 31 bytes read, error set to 0x0007
Pass 4: 32 bytes read, error set to 0x0000
Pass 5: 32 bytes read, error set to 0x0000
Pass 6: 32 bytes read, error set to 0x0000
Pass 7: 32 bytes read, error set to 0x0000
Pass 8: 23 bytes read, error set to 0x000A
---
4→N
8→S
1→C
"ENTREZ LES OPERATEURS"
While N
While Getkey:WhileEnd:Do:Getkey→D
LpWhile D=0
If D=43:Then
Locate S,4,"×"
S+1→S:N‐1→N
N≠3 And N≠2 And N≠0⇒0→C:IfEnd
If D=42:Then
Locate S,4,"+"
S+1→S:N‐1→N
0→C:IfEnd
If D=32:Then
Locate S,4,"‐"
S+1→S:N‐1→N
N≠1⇒0→C:IfEnd
If D=33:Then
Locate S,4,"÷"
S+1→S:N‐1→N
0→C:IfEnd
WhileEnd
If C=1:Then
"GOLDORAK"
Else
"INVALIDE"
IfEnd
---