; =============================================================================
;
; Litos - Character sets
;
; =============================================================================
CODE_SECTION 32
%ifdef DEBUG
;%define DEBUG_CODEPAGE ; uncomment this to test code pages
;%define DEBUG_FONTNUM ; uncomment this to display font number
%endif
CHINV EQU 0 ; invalid character (hardcoded)
CHARSETNUM EQU 44 ; number of charaster sets
; ------------- Macro - Continuous area of WORD incremental values
; %1 = start value, %2 = end value
%macro INCW 2
%assign INCW_INX %1
%rep (%2-%1+1)
dw INCW_INX
%assign INCW_INX INCW_INX + 1
%endrep
%endmacro
; ------------- Macro - Initialized single byte character set (CHARSET)
; %1 = code page
; CHSINIS - size of table from Unicode
%macro CHSINI 1
dd %1 ; code page
dd 0 ; flags
dd CP %+ %1 %+ ToUniTab ; table to Unicode
dd CharSetFromUni+CHSINIS ; table from Unicode
dd NULL ; table to capital characters
dd NULL ; table to small characters
dd CharSBRead ; read character from buffer
dd CharSBWrite ; write character into buffer
%assign CHSINIS CHSINIS + FONTMAP*4
%endmacro
; ------------- Macro - Initialized multibyte character set (CHARSET)
; %1 = code page, %2 = read character, %3 = write character
%macro CHSINI2 3
dd %1 ; code page
dd CHSET_MBYTE ; flags
dd NULL ; table to Unicode
dd NULL ; table from Unicode
dd NULL ; table to capital characters
dd NULL ; table to small characters
dd %2 ; read character from buffer
dd %3 ; write character into buffer
%endmacro
; -----------------------------------------------------------------------------
; Initialize character mapping
; -----------------------------------------------------------------------------
; ------------- Initialize list of code pages
CharTabInit: mov edi,CharSetCodePage ; EDI <- list of codepages
mov ebx,CharSetTab+(CHARSETNUM-1)*CHARSET_size
CharTabInit2: mov eax,[ebx+CHSET_CodePage] ; EAX <- code page
stosw ; store one code page
sub ebx,CHARSET_size ; EBX <- previous character set
cmp ebx,CharSetTab ; valid character set?
jae CharTabInit2 ; next character set
; ------------- Initialize ISO 8895-1 (Latin 1) table
mov edi,CP28591ToUniTab+32*2 ; EDI <- 0a0h character
xor eax,eax ; EAX <- 0
mov al,0a0h ; EAX <- 0a0h, first character
CharTabInit3: stosw ; store one character
inc al ; increase character
jnz CharTabInit3 ; initialize table 0..0ffh
; ------------- Prepare to initialize conversion table from Unicode
mov ebx,CharSetTab ; EBX <- first character set
CharTabInit5: test byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
jnz CharTabInit58 ; charset need not be initialized
mov esi,[ebx+CHSET_ToUni] ; ESI <- table to Unicode
mov dl,80h ; DL <- 80h, first character
; ------------- Get one character in Unicode (-> AX)
CharTabInit52: lodsw ; AX <- Unicode character
movzx edi,ah ; EDI <- page index
; ------------- Get page address (-> EDI)
shl edi,2 ; EDI <- offset of page address
add edi,[ebx+CHSET_FromUni] ; EDI <- table from Unicode
cmp dword [edi],byte 0 ; is page allocated?
jne CharTabInit55 ; page is already allocated
; ------------- Create new page (no memory error can occur now)
push eax ; push EAX
push edi ; push EDI
xor eax,eax ; EAX <- 0
mov ah,1 ; EAX <- 256, size of one page
call SysMemAlloc ; allocate memory
mov [edi],eax ; store page address
; ------------- Initialize page
xchg eax,edi ; EDI <- page address
xor ecx,ecx ; ECX <- 0
mov cl,256/4 ; ECX <- page size / 4
xor eax,eax ; EAX <- 0
rep stosd ; clear page
pop edi ; pop EDI
pop eax ; pop EAX
; ------------- Store character into page
CharTabInit55: mov edi,[edi] ; EDI <- page memory block
movzx ecx,al ; ECX <- offset of the character
mov [edi+ecx],dl ; store character
; ------------- Next character
CharTabInit56: inc dl ; increase character index
jnz CharTabInit52 ; next character
; ------------- Next character set
CharTabInit58: add ebx,CHARSET_size ; EBX <- next character set
cmp ebx,CharSetTab2 ; end of table?
jb CharTabInit5 ; next table
; ------------- Initialize fonts
mov ebx,F14 ; EBX <- font 8x14
call FixFontInit ; initialize font
mov ebx,F10 ; EBX <- font 8x10
call FixFontInit ; initialize font
mov ebx,F08 ; EBX <- font 8x8
call FixFontInit ; initialize font
; ------------- Test code pages
%ifdef DEBUG_CODEPAGE
; *** display number of character sets
mov esi,CPTxt1 ; ESI <- text
call DebOutText ; display text
xor edx,edx ; EDX <- 0
mov eax,CharSetTab2 ; EAX <- end of alll character sets
sub eax,CharSetTab ; EAX <- size of all character sets
mov ecx,CHARSET_size ; ECX <- size of one character set
div ecx ; EAX <- number of character sets
call DebOutNum ; display number of
; *** check number of character sets
cmp eax,CHARSETNUM ; check number of character sets
je CharTabInit81 ; number of character sets is OK
mov esi,CPTxt0 ; ESI <- error text
call DebOutText ; display text
CharTabInit81: call DebNewLine ; display new line
; *** display code page number
mov ebx,CharSetTab ; EBX <- character set table
xor ebp,ebp ; EBP <- memory accumulator
mov edx,127 ; EDX <- preset max. character
CharTabInit82: test byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
jnz CharTabInit89 ; next charset
mov eax,[ebx+CHSET_CodePage] ; EAX <- code page
call DebOutNum ; display code page
mov al,":" ; AL <- ":" character
call DebOutChar ; display ":" character
call DebOutTab ; display tabelator
; *** find maximal Unicode character
mov esi,[ebx+CHSET_ToUni] ; ESI <- table to Unicode
mov ecx,128 ; ECX <- table size
CharTabInit84: xor eax,eax ; EAX <- 0
lodsw ; EAX <- Unicode character
cmp eax,edx ; check maximal character
jb CharTabInit85 ; character is not bigger
xchg eax,edx ; store new maximal character
CharTabInit85: loop CharTabInit84 ; next charater
; *** get number of allocated pages
CharTabInit86: mov esi,CPTxt3 ; ESI <- error text
call DebOutText ; display text
push edx ; push EDX
xor edx,edx ; EDX <- 0, pages
xor ecx,ecx ; ECX <- 0
mov cl,FONTMAP ; ECX <- number of pages
mov esi,[ebx+CHSET_FromUni] ; ESI <- from Unicode table
CharTabInit87: lodsd ; load one pointer
or eax,eax ; valit page?
jz CharTabInit88 ; invalid page
inc edx ; increase number of pages
add ebp,256 ; increase memory size
CharTabInit88: loop CharTabInit87 ; next page
xchg eax,edx ; EAX <- number of pages
call DebOutNum ; display pages
pop edx ; pop EDX
call DebOutTab ; display tabelator
call DebOutTab ; display tabelator
call DebOutTab ; display tabelator
; *** next character set
CharTabInit89: add ebx,CHARSET_size ; EBX <- next character set
cmp ebx,CharSetTab2 ; end of table?
jb CharTabInit82 ; next table
; *** display maximal Unicode character
mov esi,CPTxt2 ; ESI <- text
call DebOutText ; display text
xchg eax,edx ; EAX <- maximal character
call DebOutHexW ; display maximal character
; *** allocated memory
call DebNewLine ; new line
mov esi,CPTxt4 ; ESI <- text
call DebOutText ; display text
xchg eax,ebp ; EAX <- memory
add eax,3ffh ; round up
shr eax,10 ; convert to KB
call DebOutNum ; display memory
mov esi,CPTxt5 ; ESI <- text
call DebOutText ; display text
%endif
CharTabInit9: ret
; -----------------------------------------------------------------------------
; Initialize one fixed font
; -----------------------------------------------------------------------------
; INPUT: EBX = font head
; DESTROYS: EAX, ECX, EDX, ESI, EDI
; -----------------------------------------------------------------------------
; ------------- Prepare font data
FixFontInit: lea edx,[ebx+FIXFONT_Data] ; EDX <- start of font data
%ifdef DEBUG_FONTNUM
xor ebp,ebp ; EBP <- 0, font counter
%endif
; ------------- Prepare to initialize one character
FixFontInit2: mov esi,edx ; ESI <- start of font
lodsb ; AL <- number of codes
and al,FIXFONT_MASK ; mask number of codes
movzx ecx,al ; ECX <- number of codes
%ifdef DEBUG_FONTNUM
inc ebp ; increase font counter
%endif
; ------------- Get one code
FixFontInit4: lodsw ; AX <- Unicode code
movzx edi,ah ; EDI <- page index
; ------------- Get page address (-> EDI)
shl edi,2 ; EDI <- offset of page address
add edi,[ebx+FIXFONT_Map] ; EDI <- table from Unicode
cmp dword [edi],byte 0 ; is page allocated?
jne FixFontInit6 ; page is already allocated
; ------------- Create new page (no memory error can occur now)
push eax ; push EAX
push ecx ; push ECX
push edi ; push EDI
xor eax,eax ; EAX <- 0
mov ah,4 ; EAX <- 256*4, size of one page
call SysMemAlloc ; allocate memory
mov [edi],eax ; store page address
xchg eax,edi ; EDI <- page address
; ------------- Initialize page
xor ecx,ecx ; ECX <- 0
mov ch,1 ; ECX <- 256*4 / 4
xor eax,eax ; EAX <- 0
rep stosd ; clear page
pop edi ; pop EDI
pop ecx ; pop ECX
pop eax ; pop EAX
; ------------- Store character into page
FixFontInit6: mov edi,[edi] ; EDI <- page memory block
movzx eax,al ; EAX <- offset of the character
mov [edi+eax*4],edx ; store character
; ------------- Next code
loop FixFontInit4 ; next code
; ------------- Next character
add esi,[ebx+FIXFONT_Height] ; ESI <- skip font data
mov edx,esi ; EDX <- next character
cmp edx,[ebx+FIXFONT_End] ; end of data?
jb FixFontInit2 ; next character
%ifdef DEBUG_FONTNUM
xor eax,ebp ; EAX <- font number
call DebOutNum ; display number of fonts
call DebOutSpc ; display space character
%endif
ret
; -----------------------------------------------------------------------------
; Get character set structure
; -----------------------------------------------------------------------------
; INPUT: EAX = codepage
; OUTPUT: EBX = character set structure CHARSET
; CY = codepage not found
; -----------------------------------------------------------------------------
GetCharSet: push ecx ; push ECX
push edi ; push EDI
mov edi,CharSetCodePage ; EDI <- code pages
mov ecx,CHARSETNUM ; ECX <- number of character sets
repne scasd ; find code page
mov ebx,[CharSetAddr+ecx*4] ; EBX <- character set
pop edi ; pop EDI
pop ecx ; pop ECX
jne GetCharSet7 ; code page not found
ret ; here is NC
GetCharSet7: stc ; set error flag
GetCharSet8: ret
; -----------------------------------------------------------------------------
; Read character from UTF-8 buffer
; -----------------------------------------------------------------------------
; INPUT: ECX = remaining characters
; EDX = invalid character (in Unicode)
; ESI = source buffer
; OUTPUT: EAX = Unicode character (or invalid character if no data)
; ECX = next remaining characters
; ESI = next source buffer
; -----------------------------------------------------------------------------
; ------------- Read first byte
CharUTF8Read: jecxz CharUTF8Read8 ; no data
xor eax,eax ; EAX <- 0
lodsb ; AL <- load first byte
dec ecx ; decrease number of bytes
; ------------- 1 Byte (7 bits, 0xxxxxxx = 0..7F)
cmp al,7fh ; 1 byte?
jbe CharUTF8Read24 ; data byte ok
; ------------- Invalid bytes (80..BF, FE, FF)
cmp al,0c0h ; check byte validity
jb CharUTF8Read8 ; invalid byte
cmp al,0fdh ; valid code?
ja CharUTF8Read ; detection byte, ignore it
; ------------- 2 Bytes (11 bits, 110xxxxx 10xxxxxx = C0..DF 80..BF)
cmp al,0dfh ; 2-byte code?
ja CharUTF8Read3 ; more bytes
jecxz CharUTF8Read8 ; not enough bytes
and al,1fh ; mask 5 bits
mov ah,al ; AH <- high 5 bits
CharUTF8Read22: lodsb ; AL <- load next byte
cmp al,80h ; is it control character?
jb CharUTF8Read7 ; invalid character
cmp al,0bfh ; is it control character?
ja CharUTF8Read7 ; invalid character
dec ecx ; decrease number of bytes
shl al,2 ; rotate data bits
shr eax,2 ; shift to right position
CharUTF8Read24: ret
; ------------- 3 Bytes (16 bits, 1110xxxx 10xxxxxx 10xxxxxx =
; E0..EF 80..BF 80..BF
CharUTF8Read3: cmp al,0efh ; 3-byte code?
ja CharUTF8Read4 ; more bytes
cmp ecx,byte 2 ; check number of bytes
jb CharUTF8Read8 ; not enough bytes
and al,0fh ; mask 4 bits
mov ah,al ; AH <- high 4 bits
CharUTF8Read32: lodsb ; AL <- load next byte
cmp al,80h ; is it control character?
jb CharUTF8Read7 ; invalid character
cmp al,0bfh ; is it control character?
ja CharUTF8Read7 ; invalid character
dec ecx ; decrease number of bytes
shl al,2 ; rotate data bits
shl eax,6 ; free AL
jmp short CharUTF8Read22
; ------------- 4 Bytes (21 bits, 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx =
; F0..F7 80..BF 80..BF 80..BF
CharUTF8Read4: cmp al,0f7h ; 4-byte code?
ja CharUTF8Read5 ; more bytes
cmp ecx,byte 3 ; check number of bytes
jb CharUTF8Read8 ; not enough bytes
and al,7 ; mask 3 bits
mov ah,al ; AH <- high 3 bits
CharUTF8Read42: lodsb ; AL <- load next byte
cmp al,80h ; is it control character?
jb CharUTF8Read7 ; invalid character
cmp al,0bfh ; is it control character?
ja CharUTF8Read7 ; invalid character
dec ecx ; decrease number of bytes
shl al,2 ; rotate data bits
shl eax,6 ; free AL
jmp short CharUTF8Read32
; ------------- Error
CharUTF8Read7: dec esi ; return last invalid character
CharUTF8Read8: mov eax,edx ; EAX <- invalid character
ret
; ------------- 5 Bytes (26 bits, 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
; = F8..FB 80..BF 80..BF 80..BF 80..BF
CharUTF8Read5: cmp al,0fbh ; 5-byte code?
ja CharUTF8Read6 ; more bytes
cmp ecx,byte 4 ; check number of bytes
jb CharUTF8Read8 ; not enough bytes
and al,3 ; mask 2 bits
mov ah,al ; AH <- high 2 bits
CharUTF8Read52: lodsb ; AL <- load next byte
cmp al,80h ; is it control character?
jb CharUTF8Read7 ; invalid character
cmp al,0bfh ; is it control character?
ja CharUTF8Read7 ; invalid character
dec ecx ; decrease number of bytes
shl al,2 ; rotate data bits
shl eax,6 ; free AL
jmp short CharUTF8Read42
; ------------- 6 Bytes (31 bits, 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
; 10xxxxxx = FC..FD 80..BF 80..BF 80..BF 80..BF 80..BF
CharUTF8Read6: cmp ecx,byte 5 ; check number of bytes
jb CharUTF8Read8 ; not enough bytes
and al,1 ; mask 1 bit
mov ah,al ; AH <- high 1 bit
lodsb ; AL <- load next byte
cmp al,80h ; is it control character?
jb CharUTF8Read7 ; invalid character
cmp al,0bfh ; is it control character?
ja CharUTF8Read7 ; invalid character
dec ecx ; decrease number of bytes
shl al,2 ; rotate data bits
shl eax,6 ; free AL
jmp short CharUTF8Read52
; -----------------------------------------------------------------------------
; Read character from UTF-16LE (PC) buffer
; -----------------------------------------------------------------------------
; INPUT: ECX = remaining characters
; EDX = invalid character (in Unicode)
; ESI = source buffer
; OUTPUT: EAX = Unicode character (or invalid character if no data)
; ECX = next remaining characters
; ESI = next source buffer
; -----------------------------------------------------------------------------
; ------------- Read first word
CharUTF16LER: cmp ecx,byte 2 ; check number of bytes
jb CharUTF16LER8 ; no data
xor eax,eax ; EAX <- 0
lodsw ; AX <- load first word
dec ecx ; decrease number of bytes
dec ecx ; decrease number of bytes
; ------------- 1 Word
cmp eax,0d800h ; check low limit
jb CharUTF16LER4 ; valid character
cmp eax,0e000h ; check high limit
jae CharUTF16LER4 ; valid character
cmp eax,0dc00h ; check invalid range
jae CharUTF16LER8 ; invalid character
; ------------- 2 Words
cmp ecx,byte 2 ; check number of bytes
jb CharUTF16LER8 ; no data
and eax,3ffh ; mask 10 bits
shl eax,16 ; free AX
lodsw ; AX <- load second word
cmp ax,0dc00h ; check low limit
jb CharUTF16LER6 ; invalid character
cmp ax,0e000h ; check high limit
jae CharUTF16LER6 ; invalid character
dec ecx ; decrease number of bytes
dec ecx ; decrease number of bytes
shl ax,6 ; destroy 6 bits
shr eax,6 ; shift character to position
CharUTF16LER4: ret
; ------------- Error
CharUTF16LER6: dec esi ; return data
dec esi ; return data
CharUTF16LER8: mov eax,edx ; EAX <- invalid character
ret
; -----------------------------------------------------------------------------
; Read character from UTF-16BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT: ECX = remaining characters
; EDX = invalid character (in Unicode)
; ESI = source buffer
; OUTPUT: EAX = Unicode character (or invalid character if no data)
; ECX = next remaining characters
; ESI = next source buffer
; -----------------------------------------------------------------------------
; ------------- Read first word
CharUTF16BER: cmp ecx,byte 2 ; check number of bytes
jb CharUTF16BER8 ; no data
xor eax,eax ; EAX <- 0
lodsw ; AX <- load first word
xchg al,ah ; exchange byte order
dec ecx ; decrease number of bytes
dec ecx ; decrease number of bytes
; ------------- 1 Word
cmp eax,0d800h ; check low limit
jb CharUTF16BER4 ; valid character
cmp eax,0e000h ; check high limit
jae CharUTF16BER4 ; valid character
cmp eax,0dc00h ; check invalid range
jae CharUTF16BER8 ; invalid character
; ------------- 2 Words
cmp ecx,byte 2 ; check number of bytes
jb CharUTF16BER8 ; no data
and eax,3ffh ; mask 10 bits
shl eax,16 ; free AX
lodsw ; AX <- load second word
xchg al,ah ; exchange byte order
cmp ax,0dc00h ; check low limit
jb CharUTF16BER6 ; invalid character
cmp ax,0e000h ; check high limit
jae CharUTF16BER6 ; invalid character
dec ecx ; decrease number of bytes
dec ecx ; decrease number of bytes
shl ax,6 ; destroy 6 bits
shr eax,6 ; shift character to position
CharUTF16BER4: ret
; ------------- Error
CharUTF16BER6: dec esi ; return data
dec esi ; return data
CharUTF16BER8: mov eax,edx ; EAX <- invalid character
ret
; -----------------------------------------------------------------------------
; Read character from UTF-32LE (PC) buffer
; -----------------------------------------------------------------------------
; INPUT: ECX = remaining characters
; EDX = invalid character (in Unicode)
; ESI = source buffer
; OUTPUT: EAX = Unicode character (or invalid character if no data)
; ECX = next remaining characters
; ESI = next source buffer
; -----------------------------------------------------------------------------
CharUTF32LER: cmp ecx,byte 4 ; check number of bytes
jb CharUTF16LER8 ; no data
lodsd ; EAX <- load data
sub ecx,byte 4 ; decrease number of bytes
ret
; -----------------------------------------------------------------------------
; Read character from UTF-32BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT: ECX = remaining characters
; EDX = invalid character (in Unicode)
; ESI = source buffer
; OUTPUT: EAX = Unicode character (or invalid character if no data)
; ECX = next remaining characters
; ESI = next source buffer
; -----------------------------------------------------------------------------
CharUTF32BER: cmp ecx,byte 4 ; check number of bytes
jb CharUTF16LER8 ; no data
lodsd ; EAX <- load data
xchg al,ah ; exchange AH and AL
rol eax,16 ; rotate bits
xchg al,ah ; exchange AH and AL
sub ecx,byte 4 ; decrease number of bytes
ret
; -----------------------------------------------------------------------------
; Read character from single byte buffer
; -----------------------------------------------------------------------------
; INPUT: EBX = character set structure CHARSET
; ECX = remaining characters
; EDX = invalid character (in Unicode)
; ESI = source buffer
; OUTPUT: EAX = Unicode character (EAX <- EDX on error)
; ECX = next remaining characters
; ESI = next source buffer
; CY = invalid character or no other char (EAX <- EDX on error)
; -----------------------------------------------------------------------------
CharSBRead: jecxz CharToUnicode9 ; no chatacter
lodsb ; load character from buffer
dec ecx ; decrease remaining characters
; CharToUnicode must follow.
; -----------------------------------------------------------------------------
; Convert character to Unicode
; -----------------------------------------------------------------------------
; INPUT: AL = single byte character (0 to 255)
; EBX = character set structure CHARSET
; EDX = invalid character (in Unicode)
; OUTPUT: EAX = Unicode character (EAX <- EDX on error)
; CY = invalid character (EAX <- EDX on error)
; -----------------------------------------------------------------------------
; ------------- Characters 0 to 7fh have the same Unicode code
CharToUnicode: movzx eax,al ; EAX <- character
cmp al,7fh ; ASCII page?
jbe CharToUnicode8 ; ASCII page, don't convert it
test byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
jnz CharToUnicode8 ; multibyte cannot be converted
; ------------- Push registers
push ecx ; push ECX
; ------------- Convert character and check character validity
mov ecx,[ebx+CHSET_ToUni] ; ECX <- table to Unicode
movzx eax,word [ecx+eax*2-128*2] ; EAX <- Unicode code
cmp eax,byte 1 ; invalid character (0 value)?
jae CharToUnicode6 ; character is OK
mov eax,edx ; EAX <- invalid character
; ------------- Pop registers
CharToUnicode6: pop ecx ; pop ECX
ret
CharToUnicode8: clc ; clear error flag
ret
CharToUnicode9: mov eax,edx ; EAX <- invalid character
stc ; set error flag
ret
; -----------------------------------------------------------------------------
; Write character into UTF-8 buffer
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EDI = destination buffer
; EBP = remaining space in buffer
; OUTPUT: EDI = next destination buffer
; EBP = next remaining space in buffer
; DESTROYS: EAX
; -----------------------------------------------------------------------------
; ------------- 1 Byte (7 bits, 0xxxxxxx = 0..7F)
CharUTF8Write: cmp eax,byte 7fh ; check character (7 bits)
ja CharUTF8Write2 ; character has more bytes
or ebp,ebp ; check free space
jnz CharUTF8Write24 ; buffer full
stosb ; store byte
dec ebp ; decrease remaining space
ret
; ------------- 2 Bytes (11 bits, 110xxxxx 10xxxxxx = C0..DF 80..BF)
CharUTF8Write2: cmp eax,7ffh ; check character (11 bits)
ja CharUTF8Write3 ; character has more bytes
cmp ebp,byte 2 ; check free space
jb CharUTF8Write24 ; buffer full
push eax ; push EAX
shr eax,6 ; AL <- get highest 5 bits
or al,0c0h ; add flags
CharUTF8Write22:stosb ; store byte
dec ebp ; decrease remaining space
pop eax ; pop EAX
and al,3fh ; mask log 6 bits
or al,80h ; add flags
stosb ; store byte
dec ebp ; decrease remaining space
CharUTF8Write24:ret
; ------------- 3 Bytes (16 bits, 1110xxxx 10xxxxxx 10xxxxxx =
; E0..EF 80..BF 80..BF
CharUTF8Write3: cmp eax,0ffffh ; check character (16 bits)
ja CharUTF8Write4 ; character has more bytes
cmp ebp,byte 3 ; check free space
jb CharUTF8Write24 ; buffer full
push eax ; push EAX
shr eax,12 ; AL <- get highest 4 bits
or al,0e0h ; add flags
CharUTF8Write32:stosb ; store byte
dec ebp ; decrease remaining space
pop eax ; pop EAX
push eax ; push EAX
shr eax,6 ; AL <- get 6 bits
and al,3fh ; mask middle bits
or al,80h ; add flags
jmp short CharUTF8Write22
; ------------- 4 Bytes (21 bits, 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx =
; F0..F7 80..BF 80..BF 80..BF
CharUTF8Write4: cmp eax,1fffffh ; check character (21 bits)
ja CharUTF8Write5 ; character has more bytes
cmp ebp,byte 4 ; check free space
jb CharUTF8Write24 ; buffer full
push eax ; push EAX
shr eax,18 ; AL <- get highest 3 bits
or al,0f0h ; add flags
CharUTF8Write42:stosb ; store byte
dec ebp ; decrease remaining space
pop eax ; pop EAX
push eax ; push EAX
shr eax,12 ; AL <- get 6 bits
and al,3fh ; mask 6 bits
or al,80h ; add flags
jmp short CharUTF8Write32
; ------------- 5 Bytes (26 bits, 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
; = F8..FB 80..BF 80..BF 80..BF 80..BF
CharUTF8Write5: cmp eax,3ffffffh ; check character (26 bits)
ja CharUTF8Write6 ; character has more bytes
cmp ebp,byte 5 ; check free space
jb CharUTF8Write24 ; buffer full
push eax ; push EAX
shr eax,24 ; AL <- get highest 2 bits
or al,0f8h ; add flags
CharUTF8Write52:stosb ; store byte
dec ebp ; decrease remaining space
pop eax ; pop EAX
push eax ; push EAX
shr eax,18 ; AL <- get 6 bits
and al,3fh ; mask 6 bits
or al,80h ; add flags
jmp short CharUTF8Write42
; ------------- 6 Bytes (31 bits, 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
; 10xxxxxx = FC..FD 80..BF 80..BF 80..BF 80..BF 80..BF
CharUTF8Write6: cmp ebp,byte 6 ; check free space
jb CharUTF8Write24 ; buffer full
push eax ; push EAX
shr eax,30 ; AL <- get highest 1 bit
or al,0fch ; add flags
stosb ; store byte
dec ebp ; decrease remaining space
pop eax ; pop EAX
push eax ; push EAX
shr eax,24 ; AL <- get 6 bits
and al,3fh ; mask 6 bits
or al,80h ; add flags
jmp short CharUTF8Write52
; -----------------------------------------------------------------------------
; Write character into UTF-16LE (PC) buffer
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EDI = destination buffer
; EBP = remaining space in buffer
; OUTPUT: EDI = next destination buffer
; EBP = next remaining space in buffer
; DESTROYS: EAX
; -----------------------------------------------------------------------------
; ------------- 1 Word
CharUTF16LEW: cmp eax,0ffffh ; 1 word?
ja CharUTF16LEW2 ; more than 1 word
cmp ebp,byte 2 ; check free space
jb CharUTF16LEW8 ; buffer full
dec ebp ; decrease remaining space
dec ebp ; decrease remaining space
stosw ; store character
ret
; ------------- 2 Words (20 bits)
CharUTF16LEW2: cmp ebp,byte 4 ; check free space
jb CharUTF16LEW8 ; buffer full
sub ebp,byte 4 ; decrease remaining space
push eax ; push EAX
sub eax,10000h ; subtract 65 KB
shr eax,10 ; EAX <- high 10 bits
add eax,0d800h ; add high surrogate start
stosw ; store high word
pop eax ; pop EAX
and eax,3ffh ; mask low 10 bits
add eax,0dc00h ; add low surrogate start
stosw ; store low word
CharUTF16LEW8: ret
; -----------------------------------------------------------------------------
; Write character into UTF-16BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EDI = destination buffer
; EBP = remaining space in buffer
; OUTPUT: EDI = next destination buffer
; EBP = next remaining space in buffer
; DESTROYS: EAX
; -----------------------------------------------------------------------------
; ------------- 1 Word
CharUTF16BEW: cmp eax,0ffffh ; 1 word?
ja CharUTF16BEW2 ; more than 1 word
cmp ebp,byte 2 ; check free space
jb CharUTF16BEW8 ; buffer full
dec ebp ; decrease remaining space
dec ebp ; decrease remaining space
xchg al,ah ; exchange byte order
stosw ; store character
ret
; ------------- 2 Words (20 bits)
CharUTF16BEW2: cmp ebp,byte 4 ; check free space
jb CharUTF16BEW8 ; buffer full
sub ebp,byte 4 ; decrease remaining space
push eax ; push EAX
sub eax,10000h ; subtract 65 KB
shr eax,10 ; EAX <- high 10 bits
add eax,0d800h ; add high surrogate start
xchg al,ah ; exchange byte order
stosw ; store high word
pop eax ; pop EAX
and eax,3ffh ; mask low 10 bits
add eax,0dc00h ; add low surrogate start
xchg al,ah ; exchange byte order
stosw ; store low word
CharUTF16BEW8: ret
; -----------------------------------------------------------------------------
; Write character into UTF-32LE (PC, native UNICODE) buffer
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EDI = destination buffer
; EBP = remaining space in buffer
; OUTPUT: EDI = next destination buffer
; EBP = next remaining space in buffer
; DESTROYS: EAX
; -----------------------------------------------------------------------------
CharUTF32LEW: cmp ebp,byte 4 ; check free space
jb CharUTF32LEW4 ; buffer full
sub ebp,byte 4 ; decrease remaining space
stosd ; store character
CharUTF32LEW4: ret
; -----------------------------------------------------------------------------
; Write character into UTF-32BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EDI = destination buffer
; EBP = remaining space in buffer
; OUTPUT: EDI = next destination buffer
; EBP = next remaining space in buffer
; DESTROYS: EAX
; -----------------------------------------------------------------------------
CharUTF32BEW: cmp ebp,byte 4 ; check free space
jb CharUTF32BEW4 ; buffer full
sub ebp,byte 4 ; decrease remaining space
xchg al,ah ; exchange AH and AL
rol eax,16 ; rotate bits
xchg al,ah ; exchange AH and AL
stosd ; store character
CharUTF32BEW4: ret
; -----------------------------------------------------------------------------
; Write character into single byte buffer
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EBX = character set structure CHARSET
; DL = invalid character
; EDI = destination buffer
; EBP = remaining space in buffer
; OUTPUT: EDI = next destination buffer
; EBP = next remaining space in buffer
; DESTROYS: EAX
; -----------------------------------------------------------------------------
CharSBWrite: or ebp,ebp ; buffer full?
jz CharSBWrite4 ; buffer full
call CharFromUnicode ; convert character
stosb ; store character
dec ebp ; decrease remaining space
CharSBWrite4: ret
; -----------------------------------------------------------------------------
; Convert character from Unicode
; -----------------------------------------------------------------------------
; INPUT: EAX = Unicode character
; EBX = character set structure CHARSET
; DL = invalid character (single byte character)
; OUTPUT: EAX = single byte character (0 to 255, EAX <- DL on error)
; CY = invalid character (EAX <- DL on error)
; -----------------------------------------------------------------------------
; ------------- Characters 0 to 7fh are not converted
CharFromUnicode:cmp eax,byte 7fh ; character have the same code?
jbe CharFromUni9 ; character will not be converted
test byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
jnz CharFromUni9 ; multibyte cannot be converted
; ------------- Check maximal allowed character code
cmp eax,FONTMAX ; check max. Unicode character
ja CharFromUni8 ; invalid character
; ------------- Push registers
push ecx ; push ECX
; ------------- Get page (-> ECX)
movzx ecx,ah ; ECX <- page
shl ecx,2 ; ECX <- offset of page address
add ecx,[ebx+CHSET_FromUni] ; ECX <- page address
mov ecx,[ecx] ; ECX <- page
jecxz CharFromUni6 ; page is not valid
; ------------- Get character (-> EAX)
movzx eax,al ; EAX <- character offset
movzx eax,byte [ecx+eax] ; EAX <- character
or eax,eax ; is character valid?
jz CharFromUni6 ; character is not valid
; ------------- OK, pop registers (here is NC)
pop ecx ; pop ECX
ret
; ------------- Error, pop registers
CharFromUni6: pop ecx ; pop ECX
; ------------- Error, invalid character
CharFromUni8: movzx eax,dl ; EAX <- invalid character
stc ; set error flag
ret
; ------------- Character is OK
CharFromUni9: clc ; clear error flag
ret
; -----------------------------------------------------------------------------
; Convert text
; -----------------------------------------------------------------------------
; INPUT: EAX = source code page
; EBX = destination code page
; ECX = size of source buffer (bytes)
; EDX = invalid character
; ESI = source buffer
; EDI = destination buffer
; EBP = size of destination buffer (bytes)
; OUTPUT: EAX = size of destination data (bytes, 0=invalid code page)
; -----------------------------------------------------------------------------
; ------------- Push registers
CharTrans: push ebx ; push EBX
push ecx ; push ECX
push esi ; push ESI
push edi ; push EDI
push ebp ; push EBP
; ------------- Get source character set structure (-> EBX, later -> EAX)
push ebx ; push EBX (destination code page)
call GetCharSet ; get character set structure
pop eax ; EAX <- destination code page
jc CharTrans9 ; codepage not found
; ------------- Get destination character set structure (-> EBX)
push ebx ; push EBX (source character set)
call GetCharSet ; get character set structure
pop eax ; EAX <- source character set
jc CharTrans9 ; page not found
; ------------- Convert text
CharTrans4: jecxz CharTrans9 ; no source data
push eax ; push EAX (source character set)
push ebx ; push EBX (destination character set)
xchg eax,ebx ; EBX <- source character set
call dword [ebx+CHSET_ReadChar] ; read character
pop ebx ; pop EBX (destination character set)
call dword [ebx+CHSET_WriteChar] ; write character
pop eax ; pop EAX (source character set
jmp short CharTrans4 ; next character
; ------------- Pop registers
CharTrans9: xchg eax,edi ; EAX <- new destination bufferu
pop ebp ; pop EBP
pop edi ; pop EDI
pop esi ; pop ESI
pop ecx ; pop ECX
pop ebx ; pop EBX
sub eax,edi ; EAX <- size of data in buffer
ret
; -----------------------------------------------------------------------------
; Data
; -----------------------------------------------------------------------------
DATA_SECTION
%ifdef DEBUG_CODEPAGE
CPTxt0: db '(!)',0
CPTxt1: db 'Number of character sets: ',0
CPTxt2: db 10,'max ',0
CPTxt3: db 'pages ',0
CPTxt4: db 'Allocated memory for conversion tables: ',0
CPTxt5: db ' KB',10,0
%endif
align 4, db 0
; ------------- Character set tables CHARSET
%assign CHSINIS 0 ; init size of tables from Unicode
align 4, db 0
CharSetTab:
; special single byte codes
CHSINI 0 ; ASCII
CharSetDEC: CHSINI 1 ; DEC VT100 graphics
; OEM Codepages (DOS)
CharSet437: CHSINI 437 ; IBM-437 (United States)
CHSINI 720 ; Asmo-720 (Arabic)
CHSINI 737 ; IBM-737 (Greek)
CHSINI 775 ; IBM-775 (Baltic)
CHSINI 850 ; IBM-850 (Latin 1, West Europe)
CHSINI 852 ; IBM-852 (Latin 2, Central European)
CHSINI 855 ; IBM-855 (Cyrillic, primarily Russian)
CHSINI 857 ; IBM-857 (Turkish)
CHSINI 858 ; IBM-858 (Latin 1 + Euro)
CHSINI 860 ; IBM-860 (Portuguese)
CHSINI 861 ; IBM-861 (Icelandic)
CHSINI 862 ; IBM-862 (Hebrew)
CHSINI 863 ; IBM-863 (French Canadian)
CHSINI 865 ; IBM-865 (Nordic)
CHSINI 866 ; IBM-866 (Russian)
CHSINI 869 ; IBM-869 (Modern Greek)
CHSINI 895 ; IBM-895 (Kamenickych, Czech)
; Windows Single Byte Character Set Codepages
CHSINI 874 ; Windows-874 (Thai)
CharSet1250: CHSINI 1250 ; Windows-1250 (Central Europe)
CHSINI 1251 ; Windows-1251 (Cyrillic)
CharSet1252: CHSINI 1252 ; Windows-1252 (Latin 1 Windows)
CHSINI 1253 ; Windows-1253 (Greek)
CHSINI 1254 ; Windows-1254 (Turkish)
CHSINI 1255 ; Windows-1255 (Hebrew)
CHSINI 1256 ; Windows-1256 (Arabic)
CHSINI 1257 ; Windows-1257 (Baltic)
CHSINI 1258 ; Windows-1258 (Vietnam)
; ISO Codepages
CHSINI 28591 ; ISO 8859-1 (Latin 1 Western European)
CHSINI 28592 ; ISO 8859-2 (Latin 2 Central European)
CHSINI 28593 ; ISO 8859-3 (Latin 3)
CHSINI 28594 ; ISO 8859-4 (Baltic)
CHSINI 28595 ; ISO 8859-5 (Cyrillic)
CHSINI 28596 ; ISO 8859-6 (Arabic)
CHSINI 28597 ; ISO 8859-7 (Greek)
CHSINI 28598 ; ISO 8859-8 (Hebrew)
CHSINI 28599 ; ISO 8859-9 (Turkish)
CHSINI 28605 ; ISO 8859-15 (Latin 9)
; multibyte codepages
CHSINI2 1200, CharUTF16LER, CharUTF16LEW ; Unicode UTF-16LE
CHSINI2 1201, CharUTF16BER, CharUTF16BEW ; Unicode UTF-16BE
CHSINI2 12000, CharUTF32LER, CharUTF32LEW ; Unicode UTF-32LE
CHSINI2 12001, CharUTF32BER, CharUTF32BEW ; Unicode UTF-32BE
CHSINI2 65001, CharUTF8Read, CharUTF8Write ; Unicode UTF-8
CharSetTab2:
; ------------- Address table of character sets
align 4, db 0
CharSetAddr:
%assign CHSETA 0
%rep CHARSETNUM
dd CharSetTab + CHSETA
%assign CHSETA CHSETA + CHARSET_size
%endrep
align 4, db 0
; ------------- Translation table from DEC VT100 graphics to Unicode
; It will be used in "Special Graphics Set" escape sequence.
CP1ToUniTab: INCW 0,2ah
dw 2192h, 2190h, 2191h, 2193h, 2fh
dw 2588h
INCW 31h, 5eh
dw 0a0h
dw 25c6h, 2592h, 2409h, 240ch, 240dh, 240ah, 0b0h, 0b1h
dw 2424h, 240bh, 2518h, 2510h, 250ch, 2514h, 253ch, 23bah
dw 23bbh, 2500h, 23bch, 23bdh, 251ch, 2524h, 2534h, 252ch
dw 2502h, 2264h, 2265h, 3c0h, 2260h, 0a3h, 0b7h, 2302h
; ------------- Translation table from IBM-437 (United States) to Unicode
CP437ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 0e0h, 0e5h, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, 0ech, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 0f4h, 0f6h, 0f2h, 0fbh, 0f9h
dw 0ffh, 0d6h, 0dch, 0a2h, 0a3h, 0a5h, 20a7h, 192h
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 0aah, 0bah
dw 0bfh, 2310h, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from Asmo-720 (Arabic) to Unicode
CP720ToUniTab: dw CHINV, CHINV, 0e9h, 0e2h, CHINV, 0e0h, CHINV, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, CHINV, CHINV, CHINV
dw CHINV, 651h, 652h, 0f4h, 0a4h, 640h, 0fbh, 0f9h
dw 621h, 622h, 623h, 624h, 0a3h
INCW 625h, 635h
dw 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 636h, 637h, 638h, 639h, 63ah, 641h, 0b5h, 642h
dw 643h, 644h, 645h, 646h, 647h, 648h, 649h, 64ah
dw 2261h, 64bh, 64ch, 64dh, 64eh, 64fh, 650h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-737 (Greek) to Unicode
CP737ToUniTab: INCW 391h, 3a1h
INCW 3a3h, 3a9h
INCW 3b1h, 3c0h
dw 3c1h, 3c3h, 3c2h, 3c4h, 3c5h, 3c6h, 3c7h, 3c8h
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3c9h, 3ach, 3adh, 3aeh, 3cah, 3afh, 3cch, 3cdh
dw 3cbh, 3ceh, 386h, 388h, 389h, 38ah, 38ch, 38eh
dw 38fh, 0b1h, 2265h, 2264h, 3aah, 3abh, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-775 (Baltic) to Unicode
CP775ToUniTab: dw 106h, 0fch, 0e9h, 101h, 0e4h, 123h, 0e5h, 107h
dw 142h, 113h, 156h, 157h, 12bh, 179h, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 14dh, 0f6h, 122h, 0a2h, 15ah
dw 15bh, 0d6h, 0dch, 0f8h, 0a3h, 0d8h, 0d7h, 0a4h
dw 100h, 12ah, 0f3h, 17bh, 17ch, 17ah, 201dh, 0a6h
dw 0a9h, 0aeh, 0ach, 0bdh, 0bch, 141h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 104h, 10ch, 118h
dw 116h, 2563h, 2551h, 2557h, 255dh, 12eh, 160h, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 172h, 16ah
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 17dh
dw 105h, 10dh, 119h, 117h, 12fh, 161h, 173h, 16bh
dw 17eh, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 0d3h, 0dfh, 14ch, 143h, 0f5h, 0d5h, 0b5h, 144h
dw 136h, 137h, 13bh, 13ch, 146h, 112h, 145h, 2019h
dw 0adh, 0b1h, 201ch, 0beh, 0b6h, 0a7h, 0f7h, 201eh
dw 0b0h, 2219h, 0b7h, 0b9h, 0b3h, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-850 (Latin 1 West Europe) to Unicode
CP850ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 0e0h, 0e5h, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, 0ech, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 0f4h, 0f6h, 0f2h, 0fbh, 0f9h
dw 0ffh, 0d6h, 0dch, 0f8h, 0a3h, 0d8h, 0d7h, 192h
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 0aah, 0bah
dw 0bfh, 0aeh, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 0c1h, 0c2h, 0c0h
dw 0a9h, 2563h, 2551h, 2557h, 255dh, 0a2h, 0a5h, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 0e3h, 0c3h
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 0a4h
dw 0f0h, 0d0h, 0cah, 0cbh, 0c8h, 131h, 0cdh, 0ceh
dw 0cfh, 2518h, 250ch, 2588h, 2584h, 0a6h, 0cch, 2580h
dw 0d3h, 0dfh, 0d4h, 0d2h, 0f5h, 0d5h, 0b5h, 0feh
dw 0deh, 0dah, 0dbh, 0d9h, 0fdh, 0ddh, 0afh, 0b4h
dw 0adh, 0b1h, 2017h, 0beh, 0b6h, 0a7h, 0f7h, 0b8h
dw 0b0h, 0a8h, 0b7h, 0b9h, 0b3h, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-852 (Latin 2 Cent.Europe) to Unicode
CP852ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 16fh, 107h, 0e7h
dw 142h, 0ebh, 150h, 151h, 0eeh, 179h, 0c4h, 106h
dw 0c9h, 139h, 13ah, 0f4h, 0f6h, 13dh, 13eh, 15ah
dw 15bh, 0d6h, 0dch, 164h, 165h, 141h, 0d7h, 10dh
dw 0e1h, 0edh, 0f3h, 0fah, 104h, 105h, 17dh, 17eh
dw 118h, 119h, 0ach, 17ah, 10ch, 15fh, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 0c1h, 0c2h, 11ah
dw 15eh, 2563h, 2551h, 2557h, 255dh, 17bh, 17ch, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 102h, 103h
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 0a4h
dw 111h, 110h, 10eh, 0cbh, 10fh, 147h, 0cdh, 0ceh
dw 11bh, 2518h, 250ch, 2588h, 2584h, 162h, 16eh, 2580h
dw 0d3h, 0dfh, 0d4h, 143h, 144h, 148h, 160h, 161h
dw 154h, 0dah, 155h, 170h, 0fdh, 0ddh, 163h, 0b4h
dw 0adh, 2ddh, 2dbh, 2c7h, 2d8h, 0a7h, 0f7h, 0b8h
dw 0b0h, 0a8h, 2d9h, 171h, 158h, 159h, 25a0h, 0a0h
; ------------- Translation table from IBM-855 (Cyrillic, Russian) to Unicode
CP855ToUniTab: dw 452h, 402h, 453h, 403h, 451h, 401h, 454h, 404h
dw 455h, 405h, 456h, 406h, 457h, 407h, 458h, 408h
dw 459h, 409h, 45ah, 40ah, 45bh, 40bh, 45ch, 40ch
dw 45eh, 40eh, 45fh, 40fh, 44eh, 42eh, 44ah, 42ah
dw 430h, 410h, 431h, 411h, 446h, 426h, 434h, 414h
dw 435h, 415h, 444h, 424h, 433h, 413h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 445h, 425h, 438h
dw 418h, 2563h, 2551h, 2557h, 255dh, 439h, 419h, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 43ah, 41ah
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 0a4h
dw 43bh, 41bh, 43ch, 41ch, 43dh, 41dh, 43eh, 41eh
dw 43fh, 2518h, 250ch, 2588h, 2584h, 41fh, 44fh, 2580h
dw 42fh, 440h, 420h, 441h, 421h, 442h, 422h, 443h
dw 423h, 436h, 416h, 432h, 412h, 44ch, 42ch, 2116h
dw 0adh, 44bh, 42bh, 437h, 417h, 448h, 428h, 44dh
dw 42dh, 449h, 429h, 447h, 427h, 0a7h, 25a0h, 0a0h
; ------------- Translation table from IBM-857 (Turkish) to Unicode
CP857ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 0e0h, 0e5h, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, 131h, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 0f4h, 0f6h, 0f2h, 0fbh, 0f9h
dw 130h, 0d6h, 0dch, 0f8h, 0a3h, 0d8h, 15eh, 15fh
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 11eh, 11fh
dw 0bfh, 0aeh, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 0c1h, 0c2h, 0c0h
dw 0a9h, 2563h, 2551h, 2557h, 255dh, 0a2h, 0a5h, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 0e3h, 0c3h
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 0a4h
dw 0bah, 0aah, 0cah, 0cbh, 0c8h, CHINV, 0cdh, 0ceh
dw 0cfh, 2518h, 250ch, 2588h, 2584h, 0a6h, 0cch, 2580h
dw 0d3h, 0dfh, 0d4h, 0d2h, 0f5h, 0d5h, 0b5h, CHINV
dw 0d7h, 0dah, 0dbh, 0d9h, 0ech, 0ffh, 0afh, 0b4h
dw 0adh, 0b1h, CHINV, 0beh, 0b6h, 0a7h, 0f7h, 0b8h
dw 0b0h, 0a8h, 0b7h, 0b9h, 0b3h, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-858 (Latin 1 + Euro) to Unicode
CP858ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 0e0h, 0e5h, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, 0ech, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 0f4h, 0f6h, 0f2h, 0fbh, 0f9h
dw 0ffh, 0d6h, 0dch, 0f8h, 0a3h, 0d8h, 0d7h, 192h
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 0aah, 0bah
dw 0bfh, 0aeh, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 0c1h, 0c2h, 0c0h
dw 0a9h, 2563h, 2551h, 2557h, 255dh, 0a2h, 0a5h, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 0e3h, 0c3h
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 0a4h
dw 0f0h, 0d0h, 0cah, 0cbh, 0c8h, 20ach, 0cdh, 0ceh
dw 0cfh, 2518h, 250ch, 2588h, 2584h, 0a6h, 0cch, 2580h
dw 0d3h, 0dfh, 0d4h, 0d2h, 0f5h, 0d5h, 0b5h, 0feh
dw 0deh, 0dah, 0dbh, 0d9h, 0fdh, 0ddh, 0afh, 0b4h
dw 0adh, 0b1h, 2017h, 0beh, 0b6h, 0a7h, 0f7h, 0b8h
dw 0b0h, 0a8h, 0b7h, 0b9h, 0b3h, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-860 (Portuguese) to Unicode
CP860ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e3h, 0e0h, 0c1h, 0e7h
dw 0eah, 0cah, 0e8h, 0cdh, 0d4h, 0ech, 0c3h, 0c2h
dw 0c9h, 0c0h, 0c8h, 0f4h, 0f5h, 0f2h, 0dah, 0f9h
dw 0cch, 0d5h, 0dch, 0a2h, 0a3h, 0d9h, 20a7h, 0d3h
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 0aah, 0bah
dw 0bfh, 0d2h, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-861 (Icelandic) to Unicode
CP861ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 0e0h, 0e5h, 0e7h
dw 0eah, 0ebh, 0e8h, 0d0h, 0f0h, 0deh, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 0f4h, 0f6h, 0feh, 0fbh, 0ddh
dw 0fdh, 0d6h, 0dch, 0f8h, 0a3h, 0d8h, 20a7h, 192h
dw 0e1h, 0edh, 0f3h, 0fah, 0c1h, 0cdh, 0d3h, 0dah
dw 0bfh, 2310h, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-862 (Hebrew) to Unicode
CP862ToUniTab: INCW 5d0h, 5eah
dw 0a2h, 0a3h, 0a5h, 20a7h, 192h
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 0aah, 0bah
dw 0bfh, 2310h, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-863 (French Canadian) to Unicode
CP863ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0c2h, 0e0h, 0b6h, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, 2017h, 0c0h, 0a7h
dw 0c9h, 0c8h, 0cah, 0f4h, 0cbh, 0cfh, 0fbh, 0f9h
dw 0a4h, 0d4h, 0dch, 0a2h, 0a3h, 0d9h, 0dbh, 192h
dw 0a6h, 0b4h, 0f3h, 0fah, 0a8h, 0b8h, 0b3h, 0afh
dw 0ceh, 2310h, 0ach, 0bdh, 0bch, 0beh, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-865 (Nordic) to Unicode
CP865ToUniTab: dw 0c7h, 0fch, 0e9h, 0e2h, 0e4h, 0e0h, 0e5h, 0e7h
dw 0eah, 0ebh, 0e8h, 0efh, 0eeh, 0ech, 0c4h, 0c5h
dw 0c9h, 0e6h, 0c6h, 0f4h, 0f6h, 0f2h, 0fbh, 0f9h
dw 0ffh, 0d6h, 0dch, 0f8h, 0a3h, 0d8h, 20a7h, 192h
dw 0e1h, 0edh, 0f3h, 0fah, 0f1h, 0d1h, 0aah, 0bah
dw 0bfh, 2310h, 0ach, 0bdh, 0bch, 0a1h, 0abh, 0a4h
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from IBM-866 (Russian) to Unicode
CP866ToUniTab: INCW 410h, 43fh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
INCW 440h, 44fh
dw 401h, 451h, 404h, 454h, 407h, 457h, 40eh, 45eh
dw 0b0h, 2219h, 0b7h, 221ah, 2116h, 0a4h, 25a0h, 0a0h
; ------------- Translation table from IBM-869 (Modern Greek) to Unicode
CP869ToUniTab: dw CHINV, CHINV, CHINV, CHINV, CHINV, CHINV, 386h, 386h
dw 0b7h, 0ach, 0a6h, 2018h, 2019h, 388h, 2015h, 389h
dw 38ah, 3aah, 38ch, 38ch, 38ch, 38eh, 3abh, 0a9h
dw 38fh, 0b2h, 0b3h, 3ach, 0a3h, 3adh, 3aeh, 3afh
dw 3cah, 390h, 3cch, 3cdh, 391h, 392h, 393h, 394h
dw 395h, 396h, 397h, 0bdh, 398h, 399h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 39ah, 39bh, 39ch
dw 39dh, 2563h, 2551h, 2557h, 255dh, 39eh, 39fh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 3a0h, 3a1h
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 3a3h
dw 3a4h, 3a5h, 3a6h, 3a7h, 3a8h, 3a9h, 3b1h, 3b2h
dw 3b3h, 2518h, 250ch, 2588h, 2584h, 3b4h, 3b5h, 2580h
dw 3b6h, 3b7h, 3b8h, 3b9h, 3bah, 3bbh, 3bch, 3bdh
dw 3beh, 3bfh, 3c0h, 3c1h, 3c3h, 3c2h, 3c4h, 384h
dw 0adh, 0b1h, 3c5h, 3c6h, 3c7h, 0a7h, 3c8h, 385h
dw 0b0h, 0a8h, 3c9h, 3cbh, 3b0h, 3ceh, 25a0h, 0a0h
; ------------- Translation table from IBM-895 (Kamenickych, Czech) to Unicode
CP895ToUniTab: dw 10ch, 0fch, 0e9h, 10fh, 0e4h, 10eh, 164h, 10dh
dw 11bh, 11ah, 139h, 0cdh, 13eh, 13ah, 0c4h, 0c1h
dw 0c9h, 17eh, 17dh, 0f4h, 0f6h, 0d3h, 16fh, 0dah
dw 0fdh, 0d6h, 0dch, 160h, 13dh, 0ddh, 158h, 165h
dw 0e1h, 0edh, 0f3h, 0fah, 148h, 147h, 16eh, 0d4h
dw 161h, 159h, 155h, 154h, 0bch, 0a1h, 0abh, 0bbh
dw 2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
dw 2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
dw 2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
dw 255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
dw 2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
dw 256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
dw 3b1h, 0dfh, 393h, 3c0h, 3a3h, 3c3h, 0b5h, 3c4h
dw 3a6h, 398h, 3a9h, 3b4h, 221eh, 3c6h, 3b5h, 2229h
dw 2261h, 0b1h, 2265h, 2264h, 2320h, 2321h, 0f7h, 2248h
dw 0b0h, 2219h, 0b7h, 221ah, 207fh, 0b2h, 25a0h, 0a0h
; ------------- Translation table from Windows-874 (Thai) to Unicode
CP874ToUniTab: dw 20ach, CHINV, CHINV, CHINV, CHINV, 2026h, CHINV, CHINV
times 8 dw CHINV
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
times 8 dw CHINV
dw 0a0h
INCW 0e01h, 0e3ah
dw CHINV, CHINV, CHINV, CHINV
INCW 0e3fh, 0e5bh
dw CHINV, CHINV, CHINV, CHINV
; ------------- Translation table from Windows-1250 (Central Europe) to Unicode
CP1250ToUniTab: dw 20ach, CHINV, 201ah, CHINV, 201eh, 2026h, 2020h, 2021h
dw CHINV, 2030h, 160h, 2039h, 15ah, 164h, 17dh, 179h
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw CHINV, 2122h, 161h, 203ah, 15bh, 165h, 17eh, 17ah
dw 0a0h, 2c7h, 2d8h, 141h, 0a4h, 104h, 0a6h, 0a7h
dw 0a8h, 0a9h, 15eh, 0abh, 0ach, 0adh, 0aeh, 17bh
dw 0b0h, 0b1h, 2dbh, 142h, 0b4h, 0b5h, 0b6h, 0b7h
dw 0b8h, 105h, 15fh, 0bbh, 13dh, 2ddh, 13eh, 17ch
dw 154h, 0c1h, 0c2h, 102h, 0c4h, 139h, 106h, 0c7h
dw 10ch, 0c9h, 118h, 0cbh, 11ah, 0cdh, 0ceh, 10eh
dw 110h, 143h, 147h, 0d3h, 0d4h, 150h, 0d6h, 0d7h
dw 158h, 16eh, 0dah, 170h, 0dch, 0ddh, 162h, 0dfh
dw 155h, 0e1h, 0e2h, 103h, 0e4h, 13ah, 107h, 0e7h
dw 10dh, 0e9h, 119h, 0ebh, 11bh, 0edh, 0eeh, 10fh
dw 111h, 144h, 148h, 0f3h, 0f4h, 151h, 0f6h, 0f7h
dw 159h, 16fh, 0fah, 171h, 0fch, 0fdh, 163h, 2d9h
; ------------- Translation table from Windows-1251 (Cyrillic) to Unicode
CP1251ToUniTab: dw 402h, 403h, 201ah, 453h, 201eh, 2026h, 2020h, 2021h
dw 20ach, 2030h, 409h, 2039h, 40ah, 40ch, 40bh, 40fh
dw 452h, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw CHINV, 2122h, 459h, 203ah, 45ah, 45ch, 45bh, 45fh
dw 0a0h, 40eh, 45eh, 408h, 0a4h, 490h, 0a6h, 0a7h
dw 401h, 0a9h, 404h, 0abh, 0ach, 0adh, 0aeh, 407h
dw 0b0h, 0b1h, 406h, 456h, 491h, 0b5h, 0b6h, 0b7h
dw 451h, 2116h, 454h, 0bbh, 458h, 405h, 455h, 457h
INCW 410h, 44fh
; ------------- Translat. table from Windows-1252 (Latin 1 Windows) to Unicode
CP1252ToUniTab: dw 20ach, CHINV, 201ah, 192h, 201eh, 2026h, 2020h, 2021h
dw 2c6h, 2030h, 160h, 2039h, 152h, CHINV, 17dh, CHINV
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw 2dch, 2122h, 161h, 203ah, 153h, CHINV, 17eh, 178h
INCW 0a0h, 0ffh
; ------------- Translation table from Windows-1253 (Greek) to Unicode
CP1253ToUniTab: dw 20ach, CHINV, 201ah, 192h, 201eh, 2026h, 2020h, 2021h
dw CHINV, 2030h, CHINV, 2039h, CHINV, CHINV, CHINV, CHINV
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw CHINV, 2122h, CHINV, 203ah, CHINV, CHINV, CHINV, CHINV
dw 0a0h, 385h, 386h, 0a3h, 0a4h, 0a5h, 0a6h, 0a7h
dw 0a8h, 0a9h, CHINV, 0abh, 0ach, 0adh, 0aeh, 2015h
dw 0b0h, 0b1h, 0b2h, 0b3h, 384h, 0b5h, 0b6h, 0b7h
dw 388h, 389h, 38ah, 0bbh, 38ch, 0bdh
INCW 38eh, 3a1h
dw CHINV
INCW 3a3h, 3ceh
dw CHINV
; ------------- Translation table from Windows-1254 (Turkish) to Unicode
CP1254ToUniTab: dw 20ach, CHINV, 201ah, 192h, 201eh, 2026h, 2020h, 2021h
dw 2c6h, 2030h, 160h, 2039h, 152h, CHINV, CHINV, CHINV
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw 2dch, 2122h, 161h, 203ah, 153h, CHINV, CHINV, 178h
INCW 0a0h, 0cfh
dw 11eh
INCW 0d1h, 0dch
dw 130h, 15eh
INCW 0dfh, 0efh
dw 11fh
INCW 0f1h, 0fch
dw 131h, 15fh, 0ffh
; ------------- Translation table from Windows-1255 (Hebrew) to Unicode
CP1255ToUniTab: dw 20ach, CHINV, 201ah, 192h, 201eh, 2026h, 2020h, 2021h
dw 2c6h, 2030h, CHINV, 2039h, CHINV, CHINV, CHINV, CHINV
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw 2dch, 2122h, CHINV, 203ah, CHINV, CHINV, CHINV, CHINV
dw 0a0h, 0a1h, 0a2h, 0a3h, 20aah, 0a5h, 0a6h, 0a7h
dw 0a8h, 0a9h, 0d7h, 0abh, 0ach, 0adh, 0aeh, 0afh
dw 0b0h, 0b1h, 0b2h, 0b3h, 0b4h, 0b5h, 0b6h, 0b7h
dw 0b8h, 0b9h, 0f7h, 0bbh, 0bch, 0bdh, 0beh, 0bfh
dw 5b0h, 5b1h, 5b2h, 5b3h, 5b4h, 5b5h, 5b6h, 5b7h
dw 5b8h, 5b9h, CHINV, 5bbh, 5bch, 5bdh, 5beh, 5bfh
dw 5c0h, 5c1h, 5c2h, 5c3h, 5f0h, 5f1h, 5f2h, 5f3h
dw 5f4h, CHINV, CHINV, CHINV, CHINV, CHINV, CHINV, CHINV
INCW 5d0h, 5eah
dw CHINV, CHINV, 200eh, 200fh, CHINV
; ------------- Translation table from Windows-1256 (Arabic) to Unicode
CP1256ToUniTab: dw 20ach, 67eh, 201ah, 192h, 201eh, 2026h, 2020h, 2021h
dw 2c6h, 2030h, 679h, 2039h, 152h, 686h, 698h, 688h
dw 6afh, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw 6a9h, 2122h, 691h, 203ah, 153h, 200ch, 200dh, 6bah
dw 0a0h, 60ch, 0a2h, 0a3h, 0a4h, 0a5h, 0a6h, 0a7h
dw 0a8h, 0a9h, 6beh, 0abh, 0ach, 0adh, 0aeh, 0afh
dw 0b0h, 0b1h, 0b2h, 0b3h, 0b4h, 0b5h, 0b6h, 0b7h
dw 0b8h, 0b9h, 61bh, 0bbh, 0bch, 0bdh, 0beh, 61fh
dw 6c1h
INCW 621h, 636h
dw 0d7h
dw 637h, 638h, 639h, 63ah, 640h, 641h, 642h, 643h
dw 0e0h, 644h, 0e2h, 645h, 646h, 647h, 648h, 0e7h
dw 0e8h, 0e9h, 0eah, 0ebh, 649h, 64ah, 0eeh, 0efh
dw 64bh, 64ch, 64dh, 64eh, 0f4h, 64fh, 650h, 0f7h
dw 651h, 0f9h, 652h, 0fbh, 0fch, 200eh, 200fh, 6d2h
; ------------- Translation table from Windows-1257 (Baltic) to Unicode
CP1257ToUniTab: dw 20ach, CHINV, 201ah, CHINV, 201eh, 2026h, 2020h, 2021h
dw CHINV, 2030h, CHINV, 2039h, CHINV, 0a8h, 2c7h, 0b8h
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw CHINV, 2122h, CHINV, 203ah, CHINV, 0afh, 2dbh, CHINV
dw 0a0h, CHINV, 0a2h, 0a3h, 0a4h, CHINV, 0a6h, 0a7h
dw 0d8h, 0a9h, 156h, 0abh, 0ach, 0adh, 0aeh, 0c6h
dw 0b0h, 0b1h, 0b2h, 0b3h, 0b4h, 0b5h, 0b6h, 0b7h
dw 0f8h, 0b9h, 157h, 0bbh, 0bch, 0bdh, 0beh, 0e6h
dw 104h, 12eh, 100h, 106h, 0c4h, 0c5h, 118h, 112h
dw 10ch, 0c9h, 179h, 116h, 122h, 136h, 12ah, 13bh
dw 160h, 143h, 145h, 0d3h, 14ch, 0d5h, 0d6h, 0d7h
dw 172h, 141h, 15ah, 16ah, 0dch, 17bh, 17dh, 0dfh
dw 105h, 12fh, 101h, 107h, 0e4h, 0e5h, 119h, 113h
dw 10dh, 0e9h, 17ah, 117h, 123h, 137h, 12bh, 13ch
dw 161h, 144h, 146h, 0f3h, 14dh, 0f5h, 0f6h, 0f7h
dw 173h, 142h, 15bh, 16bh, 0fch, 17ch, 17eh, 2d9h
; ------------- Translation table from Windows-1258 (Vietnam) to Unicode
CP1258ToUniTab: dw 20ach, CHINV, 201ah, 192h, 201eh, 2026h, 2020h, 2021h
dw 2c6h, 2030h, CHINV, 2039h, 152h, CHINV, CHINV, CHINV
dw CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
dw 2dch, 2122h, CHINV, 203ah, 153h, CHINV, CHINV, 178h
INCW 0a0h, 0bfh
dw 0c0h, 0c1h, 0c2h, 102h, 0c4h, 0c5h, 0c6h, 0c7h
dw 0c8h, 0c9h, 0cah, 0cbh, 300h, 0cdh, 0ceh, 0cfh
dw 110h, 0d1h, 309h, 0d3h, 0d4h, 1a0h, 0d6h, 0d7h
dw 0d8h, 0d9h, 0dah, 0dbh, 0dch, 1afh, 303h, 0dfh
dw 0e0h, 0e1h, 0e2h, 103h, 0e4h, 0e5h, 0e6h, 0e7h
dw 0e8h, 0e9h, 0eah, 0ebh, 301h, 0edh, 0eeh, 0efh
dw 111h, 0f1h, 323h, 0f3h, 0f4h, 1a1h, 0f6h, 0f7h
dw 0f8h, 0f9h, 0fah, 0fbh, 0fch, 1b0h, 20abh, 0ffh
; ------------- Translation table from ISO 8859-2 (Latin 2) to Unicode
CP28592ToUniTab:times 32 dw CHINV
dw 0a0h, 104h, 2d8h, 141h, 0a4h, 13dh, 15ah, 0a7h
dw 0a8h, 160h, 15eh, 164h, 179h, 0adh, 17dh, 17bh
dw 0b0h, 105h, 2dbh, 142h, 0b4h, 13eh, 15bh, 2c7h
dw 0b8h, 161h, 15fh, 165h, 17ah, 2ddh, 17eh, 17ch
dw 154h, 0c1h, 0c2h, 102h, 0c4h, 139h, 106h, 0c7h
dw 10ch, 0c9h, 118h, 0cbh, 11ah, 0cdh, 0ceh, 10eh
dw 110h, 143h, 147h, 0d3h, 0d4h, 150h, 0d6h, 0d7h
dw 158h, 16eh, 0dah, 170h, 0dch, 0ddh, 162h, 0dfh
dw 155h, 0e1h, 0e2h, 103h, 0e4h, 13ah, 107h, 0e7h
dw 10dh, 0e9h, 119h, 0ebh, 11bh, 0edh, 0eeh, 10fh
dw 111h, 144h, 148h, 0f3h, 0f4h, 151h, 0f6h, 0f7h
dw 159h, 16fh, 0fah, 171h, 0fch, 0fdh, 163h, 2d9h
; ------------- Translation table from ISO 8859-3 (Latin 3) to Unicode
CP28593ToUniTab:times 32 dw CHINV
dw 0a0h, 126h, 2d8h, 0a3h, 0a4h, CHINV, 124h, 0a7h
dw 0a8h, 130h, 15eh, 11eh, 134h, 0adh, CHINV, 17bh
dw 0b0h, 127h, 0b2h, 0b3h, 0b4h, 0b5h, 125h, 0b7h
dw 0b8h, 131h, 15fh, 11fh, 135h, 0bdh, CHINV, 17ch
dw 0c0h, 0c1h, 0c2h, CHINV, 0c4h, 10ah, 108h, 0c7h
dw 0c8h, 0c9h, 0cah, 0cbh, 0cch, 0cdh, 0ceh, 0cfh
dw CHINV, 0d1h, 0d2h, 0d3h, 0d4h, 120h, 0d6h, 0d7h
dw 11ch, 0d9h, 0dah, 0dbh, 0dch, 16ch, 15ch, 0dfh
dw 0e0h, 0e1h, 0e2h, CHINV, 0e4h, 10bh, 109h, 0e7h
dw 0e8h, 0e9h, 0eah, 0ebh, 0ech, 0edh, 0eeh, 0efh
dw CHINV, 0f1h, 0f2h, 0f3h, 0f4h, 121h, 0f6h, 0f7h
dw 11dh, 0f9h, 0fah, 0fbh, 0fch, 16dh, 15dh, 2d9h
; ------------- Translation table from ISO 8859-4 (Baltic) to Unicode
CP28594ToUniTab:times 32 dw CHINV
dw 0a0h, 104h, 138h, 156h, 0a4h, 128h, 13bh, 0a7h
dw 0a8h, 160h, 112h, 122h, 166h, 0adh, 17dh, 0afh
dw 0b0h, 105h, 2dbh, 157h, 0b4h, 129h, 13ch, 2c7h
dw 0b8h, 161h, 113h, 123h, 167h, 14ah, 17eh, 14bh
dw 100h, 0c1h, 0c2h, 0c3h, 0c4h, 0c5h, 0c6h, 12eh
dw 10ch, 0c9h, 118h, 0cbh, 116h, 0cdh, 0ceh, 12ah
dw 110h, 145h, 14ch, 136h, 0d4h, 0d5h, 0d6h, 0d7h
dw 0d8h, 172h, 0dah, 0dbh, 0dch, 168h, 16ah, 0dfh
dw 101h, 0e1h, 0e2h, 0e3h, 0e4h, 0e5h, 0e6h, 12fh
dw 10dh, 0e9h, 119h, 0ebh, 117h, 0edh, 0eeh, 12bh
dw 111h, 146h, 14dh, 137h, 0f4h, 0f5h, 0f6h, 0f7h
dw 0f8h, 173h, 0fah, 0fbh, 0fch, 169h, 16bh, 2d9h
; ------------- Translation table from ISO 8859-5 (Cyrillic) to Unicode
CP28595ToUniTab:times 32 dw CHINV
dw 0a0h
INCW 401h, 40ch
dw 0adh
INCW 40eh, 44fh
dw 2116h
INCW 451h, 45ch
dw 0a7h, 45eh, 45fh
; ------------- Translation table from ISO 8859-6 (Arabic) to Unicode
CP28596ToUniTab:times 32 dw CHINV
dw 0a0h, CHINV, CHINV, CHINV, 0a4h, CHINV, CHINV, CHINV
dw CHINV, CHINV, CHINV, CHINV, 60ch, 0adh, CHINV, CHINV
times 8 dw CHINV
dw CHINV, CHINV, CHINV, 61bh, CHINV, CHINV, CHINV, 61fh
dw CHINV
INCW 621h, 63ah
times 5 dw CHINV
INCW 640h, 652h
times 13 dw CHINV
; ------------- Translation table from ISO 8859-7 (Greek) to Unicode
CP28597ToUniTab:times 32 dw CHINV
dw 0a0h, 2bdh, 2bch, 0a3h, CHINV, CHINV, 0a6h, 0a7h
dw 0a8h, 0a9h, CHINV, 0abh, 0ach, 0adh, CHINV, 2015h
dw 0b0h, 0b1h, 0b2h, 0b3h, 384h, 385h, 386h, 0b7h
dw 388h, 389h, 38ah, 0bbh, 38ch, 0bdh
INCW 38eh, 3a1h
dw CHINV
INCW 3a3h, 3ceh
dw CHINV
; ------------- Translation table from ISO 8859-8 (Hebrew) to Unicode
CP28598ToUniTab:times 32 dw CHINV
dw 0a0h, CHINV, 0a2h, 0a3h, 0a4h, 0a5h, 0a6h, 0a7h
dw 0a8h, 0a9h, 0d7h, 0abh, 0ach, 0adh, 0aeh, 203eh
INCW 0b0h, 0b7h
dw 0b8h, 0b9h, 0f7h, 0bbh, 0bch, 0bdh, 0beh, CHINV
times 31 dw CHINV
dw 2017h
INCW 5d0h, 5eah
times 5 dw CHINV
; ------------- Translation table from ISO 8859-9 (Turkish) to Unicode
CP28599ToUniTab:times 32 dw CHINV
INCW 0a0h, 0cfh
dw 11eh, 0d1h, 0d2h, 0d3h, 0d4h, 0d5h, 0d6h, 0d7h
dw 0d8h, 0d9h, 0dah, 0dbh, 0dch, 130h, 15eh, 0dfh
INCW 0e0h, 0efh
dw 11fh, 0f1h, 0f2h, 0f3h, 0f4h, 0f5h, 0f6h, 0f7h
dw 0f8h, 0f9h, 0fah, 0fbh, 0fch, 131h, 15fh, 0ffh
; ------------- Translation table from ISO 8859-15 (Latin 9) to Unicode
CP28605ToUniTab:times 32 dw CHINV
dw 0a0h, 0a1h, 0a2h, 0a3h, 20ach, 0a5h, 160h, 0a7h
dw 161h, 0a9h, 0aah, 0abh, 0ach, 0adh, 0aeh, 0afh
dw 0b0h, 0b1h, 0b2h, 0b3h, 17dh, 0b5h, 0b6h, 0b7h
dw 17eh, 0b9h, 0bah, 0bbh, 152h, 153h, 178h, 0bfh
INCW 0c0h, 0ffh
; -----------------------------------------------------------------------------
; Uninitialized data
; -----------------------------------------------------------------------------
BSS_SECTION
; ------------- List of code pages (reversible, for quick search)
align 4, resb 1
CharSetCodePage:resd CHARSETNUM
; ------------- Tables from Unicode (only empty heads)
align 4, resb 1
CharSetFromUni: resb CHSINIS
CharSetFromUni2:
; ------------- Translation table from ASCII to Unicode (=invalid characters)
align 4, resb 1
CP0ToUniTab: resw 128 - 32
; 32 characters are shared with CP28591ToUniTab (=invalid characters)
; ------------- Translation table from ISO 8859-1 (Latin 1) to Unicode
align 4, resb 1
CP28591ToUniTab:resw 128
|