Tvùrce webu je i pro tebe! Postav tøeba web. Bez grafika. Bez kodéra. Hned.
wz

CHARSET.ASM

Character Sets


; =============================================================================
;
;                           Litos - Character sets
;
; =============================================================================

		CODE_SECTION	32

%ifdef DEBUG
;%define	DEBUG_CODEPAGE		; uncomment this to test code pages
;%define	DEBUG_FONTNUM		; uncomment this to display font number
%endif

CHINV		EQU	0		; invalid character (hardcoded)

CHARSETNUM	EQU	44		; number of charaster sets

; ------------- Macro - Continuous area of WORD incremental values
; %1 = start value, %2 = end value

%macro		INCW	2
		%assign INCW_INX %1
		%rep	(%2-%1+1)
		dw	INCW_INX
		%assign	INCW_INX INCW_INX + 1
		%endrep
%endmacro

; ------------- Macro - Initialized single byte character set (CHARSET)
; %1 = code page
; CHSINIS - size of table from Unicode

%macro		CHSINI	1

		dd	%1		; code page
		dd	0		; flags
		dd	CP %+ %1 %+ ToUniTab ; table to Unicode
		dd	CharSetFromUni+CHSINIS ; table from Unicode
		dd	NULL		; table to capital characters
		dd	NULL		; table to small characters
		dd	CharSBRead	; read character from buffer
		dd	CharSBWrite	; write character into buffer
%assign	CHSINIS CHSINIS + FONTMAP*4
%endmacro

; ------------- Macro - Initialized multibyte character set (CHARSET)
; %1 = code page, %2 = read character, %3 = write character

%macro		CHSINI2	3

		dd	%1		; code page
		dd	CHSET_MBYTE	; flags
		dd	NULL		; table to Unicode
		dd	NULL		; table from Unicode
		dd	NULL		; table to capital characters
		dd	NULL		; table to small characters
		dd	%2		; read character from buffer
		dd	%3		; write character into buffer
%endmacro

; -----------------------------------------------------------------------------
;                         Initialize character mapping
; -----------------------------------------------------------------------------

; ------------- Initialize list of code pages

CharTabInit:	mov	edi,CharSetCodePage ; EDI <- list of codepages
		mov	ebx,CharSetTab+(CHARSETNUM-1)*CHARSET_size
CharTabInit2:	mov	eax,[ebx+CHSET_CodePage] ; EAX <- code page
		stosw			; store one code page
		sub	ebx,CHARSET_size ; EBX <- previous character set
		cmp	ebx,CharSetTab	; valid character set?
		jae	CharTabInit2	; next character set

; ------------- Initialize ISO 8895-1 (Latin 1) table

		mov	edi,CP28591ToUniTab+32*2 ; EDI <- 0a0h character
		xor	eax,eax		; EAX <- 0
		mov	al,0a0h		; EAX <- 0a0h, first character
CharTabInit3:	stosw			; store one character
		inc	al		; increase character
		jnz	CharTabInit3	; initialize table 0..0ffh

; ------------- Prepare to initialize conversion table from Unicode

		mov	ebx,CharSetTab	; EBX <- first character set
CharTabInit5:	test	byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
		jnz	CharTabInit58	; charset need not be initialized
		mov	esi,[ebx+CHSET_ToUni] ; ESI <- table to Unicode
		mov	dl,80h		; DL <- 80h, first character

; ------------- Get one character in Unicode (-> AX)

CharTabInit52:	lodsw			; AX <- Unicode character
		movzx	edi,ah		; EDI <- page index

; ------------- Get page address (-> EDI)

		shl	edi,2		; EDI <- offset of page address
		add	edi,[ebx+CHSET_FromUni] ; EDI <- table from Unicode
		cmp	dword [edi],byte 0 ; is page allocated?
		jne	CharTabInit55	; page is already allocated

; ------------- Create new page (no memory error can occur now)

		push	eax		; push EAX
		push	edi		; push EDI

		xor	eax,eax		; EAX <- 0
		mov	ah,1		; EAX <- 256, size of one page
                call	SysMemAlloc	; allocate memory
		mov	[edi],eax	; store page address

; ------------- Initialize page

		xchg	eax,edi		; EDI <- page address
		xor	ecx,ecx		; ECX <- 0
		mov	cl,256/4	; ECX <- page size / 4
		xor	eax,eax		; EAX <- 0
		rep	stosd		; clear page

		pop	edi		; pop EDI
		pop	eax		; pop EAX

; ------------- Store character into page

CharTabInit55:	mov	edi,[edi]	; EDI <- page memory block
		movzx	ecx,al		; ECX <- offset of the character
		mov	[edi+ecx],dl	; store character

; ------------- Next character

CharTabInit56:	inc	dl		; increase character index
		jnz	CharTabInit52	; next character

; ------------- Next character set

CharTabInit58:	add	ebx,CHARSET_size ; EBX <- next character set
		cmp	ebx,CharSetTab2	; end of table?
		jb	CharTabInit5	; next table	

; ------------- Initialize fonts

		mov	ebx,F14		; EBX <- font 8x14
		call	FixFontInit	; initialize font

		mov	ebx,F10		; EBX <- font 8x10
		call	FixFontInit	; initialize font

		mov	ebx,F08		; EBX <- font 8x8
		call	FixFontInit	; initialize font

; ------------- Test code pages
%ifdef DEBUG_CODEPAGE
				; *** display number of character sets
		mov	esi,CPTxt1	; ESI <- text
		call	DebOutText	; display text

		xor	edx,edx		; EDX <- 0
		mov	eax,CharSetTab2 ; EAX <- end of alll character sets
		sub	eax,CharSetTab	; EAX <- size of all character sets
		mov	ecx,CHARSET_size ; ECX <- size of one character set
		div	ecx		; EAX <- number of character sets
		call	DebOutNum	; display number of 
				; *** check number of character sets
		cmp	eax,CHARSETNUM	; check number of character sets
		je	CharTabInit81	; number of character sets is OK
		mov	esi,CPTxt0	; ESI <- error text
		call	DebOutText	; display text
CharTabInit81:	call	DebNewLine	; display new line
				; *** display code page number
		mov	ebx,CharSetTab	; EBX <- character set table
		xor	ebp,ebp		; EBP <- memory accumulator
		mov	edx,127		; EDX <- preset max. character
CharTabInit82:	test	byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
		jnz	CharTabInit89	; next charset
		
		mov	eax,[ebx+CHSET_CodePage] ; EAX <- code page
		call	DebOutNum	; display code page
		mov	al,":"		; AL <- ":" character
		call	DebOutChar	; display ":" character
		call	DebOutTab	; display tabelator
				; *** find maximal Unicode character
		mov	esi,[ebx+CHSET_ToUni] ; ESI <- table to Unicode
		mov	ecx,128		; ECX <- table size
CharTabInit84:	xor	eax,eax		; EAX <- 0
		lodsw			; EAX <- Unicode character
		cmp	eax,edx		; check maximal character
		jb	CharTabInit85	; character is not bigger
		xchg	eax,edx		; store new maximal character
CharTabInit85:	loop	CharTabInit84	; next charater
				; *** get number of allocated pages
CharTabInit86:	mov	esi,CPTxt3	; ESI <- error text
		call	DebOutText	; display text
		push	edx		; push EDX
		xor	edx,edx		; EDX <- 0, pages
		xor	ecx,ecx		; ECX <- 0
		mov	cl,FONTMAP	; ECX <- number of pages
		mov	esi,[ebx+CHSET_FromUni] ; ESI <- from Unicode table
CharTabInit87:	lodsd			; load one pointer
		or	eax,eax		; valit page?
		jz	CharTabInit88	; invalid page
		inc	edx		; increase number of pages
		add	ebp,256		; increase memory size
CharTabInit88:	loop	CharTabInit87	; next page
		xchg	eax,edx		; EAX <- number of pages
		call	DebOutNum	; display pages
		pop	edx		; pop EDX
		call	DebOutTab	; display tabelator
		call	DebOutTab	; display tabelator
		call	DebOutTab	; display tabelator
				; *** next character set
CharTabInit89:	add	ebx,CHARSET_size ; EBX <- next character set
		cmp	ebx,CharSetTab2	; end of table?
		jb	CharTabInit82	; next table	
				; *** display maximal Unicode character
		mov	esi,CPTxt2	; ESI <- text
		call	DebOutText	; display text
		xchg	eax,edx		; EAX <- maximal character
		call	DebOutHexW	; display maximal character
				; *** allocated memory
		call	DebNewLine	; new line
		mov	esi,CPTxt4	; ESI <- text
		call	DebOutText	; display text
		xchg	eax,ebp		; EAX <- memory
		add	eax,3ffh	; round up
		shr	eax,10		; convert to KB
		call	DebOutNum	; display memory
		mov	esi,CPTxt5	; ESI <- text
		call	DebOutText	; display text
%endif

CharTabInit9:	ret

; -----------------------------------------------------------------------------
;                       Initialize one fixed font
; -----------------------------------------------------------------------------
; INPUT:	EBX = font head
; DESTROYS:	EAX, ECX, EDX, ESI, EDI
; -----------------------------------------------------------------------------

; ------------- Prepare font data

FixFontInit:	lea	edx,[ebx+FIXFONT_Data] ; EDX <- start of font data

%ifdef DEBUG_FONTNUM
		xor	ebp,ebp		; EBP <- 0, font counter
%endif
; ------------- Prepare to initialize one character

FixFontInit2:   mov	esi,edx		; ESI <- start of font
		lodsb			; AL <- number of codes
		and	al,FIXFONT_MASK	; mask number of codes
		movzx	ecx,al		; ECX <- number of codes
%ifdef DEBUG_FONTNUM
		inc	ebp		; increase font counter
%endif
; ------------- Get one code

FixFontInit4:	lodsw			; AX <- Unicode code
		movzx	edi,ah		; EDI <- page index

; ------------- Get page address (-> EDI)

		shl	edi,2		; EDI <- offset of page address
		add	edi,[ebx+FIXFONT_Map] ; EDI <- table from Unicode
		cmp	dword [edi],byte 0 ; is page allocated?
		jne	FixFontInit6	; page is already allocated

; ------------- Create new page (no memory error can occur now)

		push	eax		; push EAX
		push	ecx		; push ECX
		push	edi		; push EDI

		xor	eax,eax		; EAX <- 0
		mov	ah,4		; EAX <- 256*4, size of one page
                call	SysMemAlloc	; allocate memory
		mov	[edi],eax	; store page address
		xchg	eax,edi		; EDI <- page address

; ------------- Initialize page

		xor	ecx,ecx		; ECX <- 0
		mov	ch,1		; ECX <- 256*4 / 4
		xor	eax,eax		; EAX <- 0
		rep	stosd		; clear page

		pop	edi		; pop EDI
		pop	ecx		; pop ECX
		pop	eax		; pop EAX

; ------------- Store character into page

FixFontInit6:	mov	edi,[edi]	; EDI <- page memory block
		movzx	eax,al		; EAX <- offset of the character
		mov	[edi+eax*4],edx	; store character

; ------------- Next code

		loop	FixFontInit4	; next code

; ------------- Next character

		add	esi,[ebx+FIXFONT_Height] ; ESI <- skip font data
		mov	edx,esi		; EDX <- next character
		cmp	edx,[ebx+FIXFONT_End] ; end of data?
		jb	FixFontInit2	; next character

%ifdef DEBUG_FONTNUM
		xor	eax,ebp		; EAX <- font number
		call	DebOutNum	; display number of fonts
		call	DebOutSpc	; display space character
%endif
		ret

; -----------------------------------------------------------------------------
;                       Get character set structure
; -----------------------------------------------------------------------------
; INPUT:	EAX = codepage
; OUTPUT:	EBX = character set structure CHARSET
;		CY = codepage not found
; -----------------------------------------------------------------------------

GetCharSet:	push	ecx		; push ECX
		push	edi		; push EDI

		mov	edi,CharSetCodePage ; EDI <- code pages
		mov	ecx,CHARSETNUM	; ECX <- number of character sets
		repne	scasd		; find code page
		mov	ebx,[CharSetAddr+ecx*4] ; EBX <- character set

		pop	edi		; pop EDI
		pop	ecx		; pop ECX
		jne	GetCharSet7	; code page not found
		ret			; here is NC

GetCharSet7:	stc			; set error flag
GetCharSet8:	ret

; -----------------------------------------------------------------------------
;                     Read character from UTF-8 buffer
; -----------------------------------------------------------------------------
; INPUT:	ECX = remaining characters
;		EDX = invalid character (in Unicode)
;		ESI = source buffer
; OUTPUT:	EAX = Unicode character (or invalid character if no data)
;		ECX = next remaining characters
;		ESI = next source buffer
; -----------------------------------------------------------------------------

; ------------- Read first byte

CharUTF8Read:	jecxz	CharUTF8Read8	; no data
		xor	eax,eax		; EAX <- 0
		lodsb			; AL <- load first byte
		dec	ecx		; decrease number of bytes

; ------------- 1 Byte (7 bits, 0xxxxxxx = 0..7F)

		cmp	al,7fh		; 1 byte?
		jbe	CharUTF8Read24	; data byte ok

; ------------- Invalid bytes (80..BF, FE, FF)

		cmp	al,0c0h		; check byte validity
		jb	CharUTF8Read8	; invalid byte
		cmp	al,0fdh		; valid code?
		ja	CharUTF8Read	; detection byte, ignore it
		
; ------------- 2 Bytes (11 bits, 110xxxxx 10xxxxxx = C0..DF 80..BF)

		cmp	al,0dfh		; 2-byte code?
		ja	CharUTF8Read3	; more bytes
		jecxz	CharUTF8Read8	; not enough bytes

		and	al,1fh		; mask 5 bits
		mov	ah,al		; AH <- high 5 bits

CharUTF8Read22:	lodsb			; AL <- load next byte
		cmp	al,80h		; is it control character?
		jb	CharUTF8Read7	; invalid character
		cmp	al,0bfh		; is it control character?
		ja	CharUTF8Read7	; invalid character
		dec	ecx		; decrease number of bytes
		shl	al,2		; rotate data bits
		shr	eax,2		; shift to right position
CharUTF8Read24:	ret

; ------------- 3 Bytes (16 bits, 1110xxxx 10xxxxxx 10xxxxxx =
;					E0..EF 80..BF 80..BF
CharUTF8Read3:	cmp	al,0efh		; 3-byte code?
		ja	CharUTF8Read4	; more bytes

		cmp	ecx,byte 2	; check number of bytes
		jb	CharUTF8Read8	; not enough bytes

		and	al,0fh		; mask 4 bits
		mov	ah,al		; AH <- high 4 bits

CharUTF8Read32:	lodsb			; AL <- load next byte
		cmp	al,80h		; is it control character?
		jb	CharUTF8Read7	; invalid character
		cmp	al,0bfh		; is it control character?
		ja	CharUTF8Read7	; invalid character
		dec	ecx		; decrease number of bytes
		shl	al,2		; rotate data bits
		shl	eax,6		; free AL
		jmp	short CharUTF8Read22

; ------------- 4 Bytes (21 bits, 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx =
;					F0..F7 80..BF 80..BF 80..BF
CharUTF8Read4:	cmp	al,0f7h		; 4-byte code?
		ja	CharUTF8Read5	; more bytes

		cmp	ecx,byte 3	; check number of bytes
		jb	CharUTF8Read8	; not enough bytes

		and	al,7		; mask 3 bits
		mov	ah,al		; AH <- high 3 bits

CharUTF8Read42:	lodsb			; AL <- load next byte
		cmp	al,80h		; is it control character?
		jb	CharUTF8Read7	; invalid character
		cmp	al,0bfh		; is it control character?
		ja	CharUTF8Read7	; invalid character
		dec	ecx		; decrease number of bytes
		shl	al,2		; rotate data bits
		shl	eax,6		; free AL
		jmp	short CharUTF8Read32

; ------------- Error

CharUTF8Read7:	dec	esi		; return last invalid character
CharUTF8Read8:	mov	eax,edx		; EAX <- invalid character
		ret

; ------------- 5 Bytes (26 bits, 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
;					= F8..FB 80..BF 80..BF 80..BF 80..BF
CharUTF8Read5:	cmp	al,0fbh		; 5-byte code?
		ja	CharUTF8Read6	; more bytes

		cmp	ecx,byte 4	; check number of bytes
		jb	CharUTF8Read8	; not enough bytes

		and	al,3		; mask 2 bits
		mov	ah,al		; AH <- high 2 bits

CharUTF8Read52:	lodsb			; AL <- load next byte
		cmp	al,80h		; is it control character?
		jb	CharUTF8Read7	; invalid character
		cmp	al,0bfh		; is it control character?
		ja	CharUTF8Read7	; invalid character
		dec	ecx		; decrease number of bytes
		shl	al,2		; rotate data bits
		shl	eax,6		; free AL
		jmp	short CharUTF8Read42

; ------------- 6 Bytes (31 bits, 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
;			 10xxxxxx = FC..FD 80..BF 80..BF 80..BF 80..BF 80..BF
CharUTF8Read6:	cmp	ecx,byte 5	; check number of bytes
		jb	CharUTF8Read8	; not enough bytes

		and	al,1		; mask 1 bit
		mov	ah,al		; AH <- high 1 bit

		lodsb			; AL <- load next byte
		cmp	al,80h		; is it control character?
		jb	CharUTF8Read7	; invalid character
		cmp	al,0bfh		; is it control character?
		ja	CharUTF8Read7	; invalid character
		dec	ecx		; decrease number of bytes
		shl	al,2		; rotate data bits
		shl	eax,6		; free AL
		jmp	short CharUTF8Read52

; -----------------------------------------------------------------------------
;                   Read character from UTF-16LE (PC) buffer
; -----------------------------------------------------------------------------
; INPUT:	ECX = remaining characters
;		EDX = invalid character (in Unicode)
;		ESI = source buffer
; OUTPUT:	EAX = Unicode character (or invalid character if no data)
;		ECX = next remaining characters
;		ESI = next source buffer
; -----------------------------------------------------------------------------

; ------------- Read first word

CharUTF16LER:	cmp	ecx,byte 2	; check number of bytes
		jb	CharUTF16LER8	; no data
		xor	eax,eax		; EAX <- 0
		lodsw			; AX <- load first word
		dec	ecx		; decrease number of bytes
		dec	ecx		; decrease number of bytes

; ------------- 1 Word

		cmp	eax,0d800h	; check low limit
		jb	CharUTF16LER4	; valid character
		cmp	eax,0e000h	; check high limit
		jae	CharUTF16LER4	; valid character
		cmp	eax,0dc00h	; check invalid range
		jae	CharUTF16LER8	; invalid character

; ------------- 2 Words

		cmp	ecx,byte 2	; check number of bytes
		jb	CharUTF16LER8	; no data
		and	eax,3ffh	; mask 10 bits
		shl	eax,16		; free AX
		lodsw			; AX <- load second word

		cmp	ax,0dc00h	; check low limit
		jb	CharUTF16LER6	; invalid character
		cmp	ax,0e000h	; check high limit
		jae	CharUTF16LER6	; invalid character

		dec	ecx		; decrease number of bytes
		dec	ecx		; decrease number of bytes
		shl	ax,6		; destroy 6 bits
		shr	eax,6		; shift character to position
CharUTF16LER4:	ret

; ------------- Error

CharUTF16LER6:	dec	esi		; return data
		dec	esi		; return data
CharUTF16LER8:	mov	eax,edx		; EAX <- invalid character
		ret

; -----------------------------------------------------------------------------
;                  Read character from UTF-16BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT:	ECX = remaining characters
;		EDX = invalid character (in Unicode)
;		ESI = source buffer
; OUTPUT:	EAX = Unicode character (or invalid character if no data)
;		ECX = next remaining characters
;		ESI = next source buffer
; -----------------------------------------------------------------------------

; ------------- Read first word

CharUTF16BER:	cmp	ecx,byte 2	; check number of bytes
		jb	CharUTF16BER8	; no data
		xor	eax,eax		; EAX <- 0
		lodsw			; AX <- load first word
		xchg	al,ah		; exchange byte order
		dec	ecx		; decrease number of bytes
		dec	ecx		; decrease number of bytes

; ------------- 1 Word

		cmp	eax,0d800h	; check low limit
		jb	CharUTF16BER4	; valid character
		cmp	eax,0e000h	; check high limit
		jae	CharUTF16BER4	; valid character
		cmp	eax,0dc00h	; check invalid range
		jae	CharUTF16BER8	; invalid character

; ------------- 2 Words

		cmp	ecx,byte 2	; check number of bytes
		jb	CharUTF16BER8	; no data
		and	eax,3ffh	; mask 10 bits
		shl	eax,16		; free AX
		lodsw			; AX <- load second word
		xchg	al,ah		; exchange byte order

		cmp	ax,0dc00h	; check low limit
		jb	CharUTF16BER6	; invalid character
		cmp	ax,0e000h	; check high limit
		jae	CharUTF16BER6	; invalid character

		dec	ecx		; decrease number of bytes
		dec	ecx		; decrease number of bytes
		shl	ax,6		; destroy 6 bits
		shr	eax,6		; shift character to position
CharUTF16BER4:	ret

; ------------- Error

CharUTF16BER6:	dec	esi		; return data
		dec	esi		; return data
CharUTF16BER8:	mov	eax,edx		; EAX <- invalid character
		ret

; -----------------------------------------------------------------------------
;                   Read character from UTF-32LE (PC) buffer
; -----------------------------------------------------------------------------
; INPUT:	ECX = remaining characters
;		EDX = invalid character (in Unicode)
;		ESI = source buffer
; OUTPUT:	EAX = Unicode character (or invalid character if no data)
;		ECX = next remaining characters
;		ESI = next source buffer
; -----------------------------------------------------------------------------

CharUTF32LER:	cmp	ecx,byte 4	; check number of bytes
		jb	CharUTF16LER8	; no data
		lodsd			; EAX <- load data
		sub	ecx,byte 4	; decrease number of bytes
		ret

; -----------------------------------------------------------------------------
;                   Read character from UTF-32BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT:	ECX = remaining characters
;		EDX = invalid character (in Unicode)
;		ESI = source buffer
; OUTPUT:	EAX = Unicode character (or invalid character if no data)
;		ECX = next remaining characters
;		ESI = next source buffer
; -----------------------------------------------------------------------------

CharUTF32BER:	cmp	ecx,byte 4	; check number of bytes
		jb	CharUTF16LER8	; no data
		lodsd			; EAX <- load data
		xchg	al,ah		; exchange AH and AL
		rol	eax,16		; rotate bits
		xchg	al,ah		; exchange AH and AL
		sub	ecx,byte 4	; decrease number of bytes
		ret

; -----------------------------------------------------------------------------
;                  Read character from single byte buffer
; -----------------------------------------------------------------------------
; INPUT:	EBX = character set structure CHARSET
;		ECX = remaining characters
;		EDX = invalid character (in Unicode)
;		ESI = source buffer
; OUTPUT:	EAX = Unicode character (EAX <- EDX on error)
;		ECX = next remaining characters
;		ESI = next source buffer
;		CY = invalid character or no other char (EAX <- EDX on error)
; -----------------------------------------------------------------------------

CharSBRead:	jecxz	CharToUnicode9	; no chatacter
		lodsb			; load character from buffer
		dec	ecx		; decrease remaining characters

; CharToUnicode must follow.

; -----------------------------------------------------------------------------
;                        Convert character to Unicode
; -----------------------------------------------------------------------------
; INPUT:	AL = single byte character (0 to 255)
;		EBX = character set structure CHARSET
;		EDX = invalid character (in Unicode)
; OUTPUT:	EAX = Unicode character (EAX <- EDX on error)
;		CY = invalid character (EAX <- EDX on error)
; -----------------------------------------------------------------------------

; ------------- Characters 0 to 7fh have the same Unicode code

CharToUnicode:	movzx	eax,al		; EAX <- character
		cmp	al,7fh		; ASCII page?
		jbe	CharToUnicode8	; ASCII page, don't convert it
		test	byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
		jnz	CharToUnicode8	; multibyte cannot be converted

; ------------- Push registers

		push	ecx		; push ECX

; ------------- Convert character and check character validity

		mov	ecx,[ebx+CHSET_ToUni] ; ECX <- table to Unicode
		movzx	eax,word [ecx+eax*2-128*2] ; EAX <- Unicode code
		cmp	eax,byte 1	; invalid character (0 value)?
		jae	CharToUnicode6	; character is OK
		mov	eax,edx		; EAX <- invalid character

; ------------- Pop registers

CharToUnicode6:	pop	ecx		; pop ECX
		ret

CharToUnicode8:	clc			; clear error flag
		ret

CharToUnicode9:	mov	eax,edx		; EAX <- invalid character
		stc			; set error flag
		ret

; -----------------------------------------------------------------------------
;                      Write character into UTF-8 buffer
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EDI = destination buffer
;		EBP = remaining space in buffer
; OUTPUT:	EDI = next destination buffer
;		EBP = next remaining space in buffer
; DESTROYS:	EAX
; -----------------------------------------------------------------------------

; ------------- 1 Byte (7 bits, 0xxxxxxx = 0..7F)

CharUTF8Write:	cmp	eax,byte 7fh	; check character (7 bits)
		ja	CharUTF8Write2	; character has more bytes

		or	ebp,ebp		; check free space
		jnz	CharUTF8Write24	; buffer full
		stosb			; store byte
		dec	ebp		; decrease remaining space
		ret

; ------------- 2 Bytes (11 bits, 110xxxxx 10xxxxxx = C0..DF 80..BF)

CharUTF8Write2:	cmp	eax,7ffh	; check character (11 bits)
		ja	CharUTF8Write3	; character has more bytes

		cmp	ebp,byte 2	; check free space
		jb	CharUTF8Write24	; buffer full

		push	eax		; push EAX
		shr	eax,6		; AL <- get highest 5 bits
		or	al,0c0h		; add flags
CharUTF8Write22:stosb			; store byte
		dec	ebp		; decrease remaining space
		pop	eax		; pop EAX

		and	al,3fh		; mask log 6 bits
		or	al,80h		; add flags
		stosb			; store byte
		dec	ebp		; decrease remaining space
CharUTF8Write24:ret

; ------------- 3 Bytes (16 bits, 1110xxxx 10xxxxxx 10xxxxxx =
;					E0..EF 80..BF 80..BF
CharUTF8Write3:	cmp	eax,0ffffh	; check character (16 bits)
		ja	CharUTF8Write4	; character has more bytes

		cmp	ebp,byte 3	; check free space
		jb	CharUTF8Write24	; buffer full

		push	eax		; push EAX
		shr	eax,12		; AL <- get highest 4 bits
		or	al,0e0h		; add flags
CharUTF8Write32:stosb			; store byte
		dec	ebp		; decrease remaining space
		pop	eax		; pop EAX

		push	eax		; push EAX
		shr	eax,6		; AL <- get 6 bits
		and	al,3fh		; mask middle bits
		or	al,80h		; add flags
		jmp	short CharUTF8Write22

; ------------- 4 Bytes (21 bits, 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx =
;					F0..F7 80..BF 80..BF 80..BF
CharUTF8Write4:	cmp	eax,1fffffh	; check character (21 bits)
		ja	CharUTF8Write5	; character has more bytes

		cmp	ebp,byte 4	; check free space
		jb	CharUTF8Write24	; buffer full

		push	eax		; push EAX
		shr	eax,18		; AL <- get highest 3 bits
		or	al,0f0h		; add flags
CharUTF8Write42:stosb			; store byte
		dec	ebp		; decrease remaining space
		pop	eax		; pop EAX

		push	eax		; push EAX
		shr	eax,12		; AL <- get 6 bits
		and	al,3fh		; mask 6 bits
		or	al,80h		; add flags
		jmp	short CharUTF8Write32

; ------------- 5 Bytes (26 bits, 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
;					= F8..FB 80..BF 80..BF 80..BF 80..BF
CharUTF8Write5:	cmp	eax,3ffffffh	; check character (26 bits)
		ja	CharUTF8Write6	; character has more bytes

		cmp	ebp,byte 5	; check free space
		jb	CharUTF8Write24	; buffer full

		push	eax		; push EAX
		shr	eax,24		; AL <- get highest 2 bits
		or	al,0f8h		; add flags
CharUTF8Write52:stosb			; store byte
		dec	ebp		; decrease remaining space
		pop	eax		; pop EAX

		push	eax		; push EAX
		shr	eax,18		; AL <- get 6 bits
		and	al,3fh		; mask 6 bits
		or	al,80h		; add flags
		jmp	short CharUTF8Write42

; ------------- 6 Bytes (31 bits, 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
;			 10xxxxxx = FC..FD 80..BF 80..BF 80..BF 80..BF 80..BF
CharUTF8Write6:	cmp	ebp,byte 6	; check free space
		jb	CharUTF8Write24	; buffer full

		push	eax		; push EAX
		shr	eax,30		; AL <- get highest 1 bit
		or	al,0fch		; add flags
		stosb			; store byte
		dec	ebp		; decrease remaining space
		pop	eax		; pop EAX

		push	eax		; push EAX
		shr	eax,24		; AL <- get 6 bits
		and	al,3fh		; mask 6 bits
		or	al,80h		; add flags
		jmp	short CharUTF8Write52

; -----------------------------------------------------------------------------
;                 Write character into UTF-16LE (PC) buffer
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EDI = destination buffer
;		EBP = remaining space in buffer
; OUTPUT:	EDI = next destination buffer
;		EBP = next remaining space in buffer
; DESTROYS:	EAX
; -----------------------------------------------------------------------------

; ------------- 1 Word

CharUTF16LEW:	cmp	eax,0ffffh	; 1 word?
		ja	CharUTF16LEW2	; more than 1 word

		cmp	ebp,byte 2	; check free space
		jb	CharUTF16LEW8	; buffer full
		dec	ebp		; decrease remaining space
		dec	ebp		; decrease remaining space

		stosw			; store character
		ret

; ------------- 2 Words (20 bits)

CharUTF16LEW2:	cmp	ebp,byte 4	; check free space
		jb	CharUTF16LEW8	; buffer full
		sub	ebp,byte 4	; decrease remaining space

		push	eax		; push EAX
		sub	eax,10000h	; subtract 65 KB
		shr	eax,10		; EAX <- high 10 bits
		add	eax,0d800h	; add high surrogate start
		stosw			; store high word
		pop	eax		; pop EAX

		and	eax,3ffh	; mask low 10 bits
		add	eax,0dc00h	; add low surrogate start
		stosw			; store low word

CharUTF16LEW8:	ret

; -----------------------------------------------------------------------------
;                  Write character into UTF-16BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EDI = destination buffer
;		EBP = remaining space in buffer
; OUTPUT:	EDI = next destination buffer
;		EBP = next remaining space in buffer
; DESTROYS:	EAX
; -----------------------------------------------------------------------------

; ------------- 1 Word

CharUTF16BEW:	cmp	eax,0ffffh	; 1 word?
		ja	CharUTF16BEW2	; more than 1 word

		cmp	ebp,byte 2	; check free space
		jb	CharUTF16BEW8	; buffer full
		dec	ebp		; decrease remaining space
		dec	ebp		; decrease remaining space

		xchg	al,ah		; exchange byte order
		stosw			; store character
		ret

; ------------- 2 Words (20 bits)

CharUTF16BEW2:	cmp	ebp,byte 4	; check free space
		jb	CharUTF16BEW8	; buffer full
		sub	ebp,byte 4	; decrease remaining space

		push	eax		; push EAX
		sub	eax,10000h	; subtract 65 KB
		shr	eax,10		; EAX <- high 10 bits
		add	eax,0d800h	; add high surrogate start
		xchg	al,ah		; exchange byte order
		stosw			; store high word
		pop	eax		; pop EAX

		and	eax,3ffh	; mask low 10 bits
		add	eax,0dc00h	; add low surrogate start
		xchg	al,ah		; exchange byte order
		stosw			; store low word

CharUTF16BEW8:	ret

; -----------------------------------------------------------------------------
;            Write character into UTF-32LE (PC, native UNICODE) buffer
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EDI = destination buffer
;		EBP = remaining space in buffer
; OUTPUT:	EDI = next destination buffer
;		EBP = next remaining space in buffer
; DESTROYS:	EAX
; -----------------------------------------------------------------------------

CharUTF32LEW:	cmp	ebp,byte 4	; check free space
		jb	CharUTF32LEW4	; buffer full
		sub	ebp,byte 4	; decrease remaining space
		stosd			; store character
CharUTF32LEW4:	ret

; -----------------------------------------------------------------------------
;                 Write character into UTF-32BE (MAC) buffer
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EDI = destination buffer
;		EBP = remaining space in buffer
; OUTPUT:	EDI = next destination buffer
;		EBP = next remaining space in buffer
; DESTROYS:	EAX
; -----------------------------------------------------------------------------

CharUTF32BEW:	cmp	ebp,byte 4	; check free space
		jb	CharUTF32BEW4	; buffer full
		sub	ebp,byte 4	; decrease remaining space
		xchg	al,ah		; exchange AH and AL
		rol	eax,16		; rotate bits
		xchg	al,ah		; exchange AH and AL
		stosd			; store character
CharUTF32BEW4:	ret

; -----------------------------------------------------------------------------
;                  Write character into single byte buffer
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EBX = character set structure CHARSET
;		DL = invalid character
;		EDI = destination buffer
;		EBP = remaining space in buffer
; OUTPUT:	EDI = next destination buffer
;		EBP = next remaining space in buffer
; DESTROYS:	EAX
; -----------------------------------------------------------------------------

CharSBWrite:	or	ebp,ebp		; buffer full?
		jz	CharSBWrite4	; buffer full
		call	CharFromUnicode	; convert character
		stosb			; store character
		dec	ebp		; decrease remaining space
CharSBWrite4:	ret

; -----------------------------------------------------------------------------
;                      Convert character from Unicode
; -----------------------------------------------------------------------------
; INPUT:	EAX = Unicode character
;		EBX = character set structure CHARSET
;		DL = invalid character (single byte character)
; OUTPUT:	EAX = single byte character (0 to 255, EAX <- DL on error)
;		CY = invalid character (EAX <- DL on error)
; -----------------------------------------------------------------------------

; ------------- Characters 0 to 7fh are not converted

CharFromUnicode:cmp	eax,byte 7fh	; character have the same code?
		jbe	CharFromUni9	; character will not be converted
		test	byte [ebx+CHSET_Flags],CHSET_MBYTE ; multibyte?
		jnz	CharFromUni9	; multibyte cannot be converted

; ------------- Check maximal allowed character code

		cmp	eax,FONTMAX	; check max. Unicode character
		ja	CharFromUni8	; invalid character

; ------------- Push registers

		push	ecx		; push ECX

; ------------- Get page (-> ECX)

		movzx	ecx,ah		; ECX <- page
		shl	ecx,2		; ECX <- offset of page address
		add	ecx,[ebx+CHSET_FromUni] ; ECX <- page address
		mov	ecx,[ecx]	; ECX <- page
		jecxz	CharFromUni6	; page is not valid

; ------------- Get character (-> EAX)

		movzx	eax,al		; EAX <- character offset
		movzx	eax,byte [ecx+eax] ; EAX <- character
		or	eax,eax		; is character valid?
		jz	CharFromUni6	; character is not valid

; ------------- OK, pop registers (here is NC)

		pop	ecx		; pop ECX
		ret

; ------------- Error, pop registers

CharFromUni6:	pop	ecx		; pop ECX

; ------------- Error, invalid character

CharFromUni8:	movzx	eax,dl		; EAX <- invalid character
		stc			; set error flag
		ret

; ------------- Character is OK

CharFromUni9:	clc			; clear error flag
		ret

; -----------------------------------------------------------------------------
;                             Convert text
; -----------------------------------------------------------------------------
; INPUT:	EAX = source code page
;		EBX = destination code page
;		ECX = size of source buffer (bytes)
;		EDX = invalid character
;		ESI = source buffer
;		EDI = destination buffer
;		EBP = size of destination buffer (bytes)
; OUTPUT:	EAX = size of destination data (bytes, 0=invalid code page)
; -----------------------------------------------------------------------------

; ------------- Push registers

CharTrans:	push	ebx		; push EBX
		push	ecx		; push ECX
		push	esi		; push ESI
		push	edi		; push EDI
		push	ebp		; push EBP

; ------------- Get source character set structure (-> EBX, later -> EAX)

		push	ebx		; push EBX (destination code page)
		call	GetCharSet	; get character set structure
		pop	eax		; EAX <- destination code page
		jc	CharTrans9	; codepage not found

; ------------- Get destination character set structure (-> EBX)

		push	ebx		; push EBX (source character set)
		call	GetCharSet	; get character set structure
		pop	eax		; EAX <- source character set
		jc	CharTrans9	; page not found

; ------------- Convert text

CharTrans4:	jecxz	CharTrans9	; no source data
		push	eax		; push EAX (source character set)
		push	ebx		; push EBX (destination character set)
		xchg	eax,ebx		; EBX <- source character set
		call	dword [ebx+CHSET_ReadChar] ; read character
		pop	ebx		; pop EBX (destination character set)
		call	dword [ebx+CHSET_WriteChar] ; write character
		pop	eax		; pop EAX (source character set
		jmp	short CharTrans4 ; next character

; ------------- Pop registers

CharTrans9:	xchg	eax,edi		; EAX <- new destination bufferu
		pop	ebp		; pop EBP
		pop	edi		; pop EDI
		pop	esi		; pop ESI
		pop	ecx		; pop ECX
		pop	ebx		; pop EBX
		sub	eax,edi		; EAX <- size of data in buffer
		ret

; -----------------------------------------------------------------------------
;                                   Data
; -----------------------------------------------------------------------------

		DATA_SECTION

%ifdef DEBUG_CODEPAGE
CPTxt0:		db	'(!)',0
CPTxt1:		db	'Number of character sets: ',0
CPTxt2:		db	10,'max ',0
CPTxt3:		db	'pages ',0
CPTxt4:		db	'Allocated memory for conversion tables: ',0
CPTxt5:		db	' KB',10,0
%endif
		align	4, db 0

; ------------- Character set tables CHARSET
%assign CHSINIS 0			; init size of tables from Unicode
		align	4, db 0
CharSetTab:
				; special single byte codes
		CHSINI	0		; ASCII
CharSetDEC:	CHSINI	1		; DEC VT100 graphics
				; OEM Codepages (DOS)
CharSet437:	CHSINI	437		; IBM-437 (United States)
		CHSINI	720		; Asmo-720 (Arabic)
		CHSINI	737		; IBM-737 (Greek)
		CHSINI	775		; IBM-775 (Baltic)
		CHSINI	850		; IBM-850 (Latin 1, West Europe)
		CHSINI	852		; IBM-852 (Latin 2, Central European)
		CHSINI	855		; IBM-855 (Cyrillic, primarily Russian)
		CHSINI	857		; IBM-857 (Turkish)
		CHSINI	858		; IBM-858 (Latin 1 + Euro)
		CHSINI	860		; IBM-860 (Portuguese)
		CHSINI	861		; IBM-861 (Icelandic)
		CHSINI	862		; IBM-862 (Hebrew)
		CHSINI	863		; IBM-863 (French Canadian)
		CHSINI	865		; IBM-865 (Nordic)
		CHSINI	866		; IBM-866 (Russian)
		CHSINI	869		; IBM-869 (Modern Greek)
		CHSINI	895		; IBM-895 (Kamenickych, Czech)
				; Windows Single Byte Character Set Codepages
		CHSINI	874		; Windows-874 (Thai)
CharSet1250:	CHSINI	1250		; Windows-1250 (Central Europe)
		CHSINI	1251		; Windows-1251 (Cyrillic)
CharSet1252:	CHSINI	1252		; Windows-1252 (Latin 1 Windows)
		CHSINI	1253		; Windows-1253 (Greek)
		CHSINI	1254		; Windows-1254 (Turkish)
		CHSINI	1255		; Windows-1255 (Hebrew)
		CHSINI	1256		; Windows-1256 (Arabic)
		CHSINI	1257		; Windows-1257 (Baltic)
		CHSINI	1258		; Windows-1258 (Vietnam)
				; ISO Codepages
		CHSINI	28591		; ISO 8859-1 (Latin 1 Western European)
		CHSINI	28592		; ISO 8859-2 (Latin 2 Central European)
		CHSINI	28593		; ISO 8859-3 (Latin 3)
		CHSINI	28594		; ISO 8859-4 (Baltic)
		CHSINI	28595		; ISO 8859-5 (Cyrillic)
		CHSINI	28596		; ISO 8859-6 (Arabic)
		CHSINI	28597		; ISO 8859-7 (Greek)
		CHSINI	28598		; ISO 8859-8 (Hebrew)
		CHSINI	28599		; ISO 8859-9 (Turkish)
		CHSINI	28605		; ISO 8859-15 (Latin 9)
				; multibyte codepages
		CHSINI2	1200, CharUTF16LER, CharUTF16LEW ; Unicode UTF-16LE
		CHSINI2	1201, CharUTF16BER, CharUTF16BEW ; Unicode UTF-16BE
		CHSINI2	12000, CharUTF32LER, CharUTF32LEW ; Unicode UTF-32LE
		CHSINI2	12001, CharUTF32BER, CharUTF32BEW ; Unicode UTF-32BE
		CHSINI2	65001, CharUTF8Read, CharUTF8Write ; Unicode UTF-8
CharSetTab2:

; ------------- Address table of character sets

		align	4, db 0
CharSetAddr:
%assign	CHSETA	0
%rep	CHARSETNUM
		dd	CharSetTab + CHSETA
%assign	CHSETA	CHSETA + CHARSET_size
%endrep
		align	4, db 0

; ------------- Translation table from DEC VT100 graphics to Unicode
; It will be used in "Special Graphics Set" escape sequence.

CP1ToUniTab:	INCW	0,2ah
		dw	2192h, 2190h, 2191h, 2193h,   2fh
		dw	2588h
		INCW	31h, 5eh
		dw	0a0h
		dw	25c6h, 2592h, 2409h, 240ch, 240dh, 240ah,  0b0h,  0b1h
		dw	2424h, 240bh, 2518h, 2510h, 250ch, 2514h, 253ch, 23bah
		dw	23bbh, 2500h, 23bch, 23bdh, 251ch, 2524h, 2534h, 252ch
		dw	2502h, 2264h, 2265h,  3c0h, 2260h,  0a3h,  0b7h, 2302h
		                
; ------------- Translation table from IBM-437 (United States) to Unicode

CP437ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  0e0h,  0e5h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh,  0ech,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  0f4h,  0f6h,  0f2h,  0fbh,  0f9h
		dw	 0ffh,  0d6h,  0dch,  0a2h,  0a3h,  0a5h, 20a7h,  192h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  0aah,  0bah
		dw	 0bfh, 2310h,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from Asmo-720 (Arabic) to Unicode

CP720ToUniTab:	dw	CHINV, CHINV,  0e9h,  0e2h, CHINV,  0e0h, CHINV,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh, CHINV, CHINV, CHINV
		dw	CHINV,  651h,  652h,  0f4h,  0a4h,  640h,  0fbh,  0f9h
		dw	 621h,  622h,  623h,  624h,  0a3h
		INCW	625h, 635h
		dw	0abh, 0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 636h,  637h,  638h,  639h,  63ah,  641h,  0b5h,  642h
		dw	 643h,  644h,  645h,  646h,  647h,  648h,  649h,  64ah
		dw	2261h,  64bh,  64ch,  64dh,  64eh,  64fh,  650h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-737 (Greek) to Unicode

CP737ToUniTab:	INCW	391h, 3a1h
		INCW	3a3h, 3a9h
		INCW	3b1h, 3c0h
		dw	 3c1h,  3c3h,  3c2h,  3c4h,  3c5h,  3c6h,  3c7h,  3c8h
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3c9h,  3ach,  3adh,  3aeh,  3cah,  3afh,  3cch,  3cdh
		dw	 3cbh,  3ceh,  386h,  388h,  389h,  38ah,  38ch,  38eh
		dw	 38fh,  0b1h, 2265h, 2264h,  3aah,  3abh,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-775 (Baltic) to Unicode

CP775ToUniTab:	dw	 106h,  0fch,  0e9h,  101h,  0e4h,  123h,  0e5h,  107h
		dw	 142h,  113h,  156h,  157h,  12bh,  179h,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  14dh,  0f6h,  122h,  0a2h,  15ah
		dw	 15bh,  0d6h,  0dch,  0f8h,  0a3h,  0d8h,  0d7h,  0a4h
		dw	 100h,  12ah,  0f3h,  17bh,  17ch,  17ah, 201dh,  0a6h
		dw	 0a9h,  0aeh,  0ach,  0bdh,  0bch,  141h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  104h,  10ch,  118h
		dw	 116h, 2563h, 2551h, 2557h, 255dh,  12eh,  160h, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  172h,  16ah
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  17dh
		dw	 105h,  10dh,  119h,  117h,  12fh,  161h,  173h,  16bh
		dw	 17eh, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 0d3h,  0dfh,  14ch,  143h,  0f5h,  0d5h,  0b5h,  144h
		dw	 136h,  137h,  13bh,  13ch,  146h,  112h,  145h, 2019h
		dw	 0adh,  0b1h, 201ch,  0beh,  0b6h,  0a7h,  0f7h, 201eh
		dw	 0b0h, 2219h,  0b7h,  0b9h,  0b3h,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-850 (Latin 1 West Europe) to Unicode

CP850ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  0e0h,  0e5h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh,  0ech,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  0f4h,  0f6h,  0f2h,  0fbh,  0f9h
		dw	 0ffh,  0d6h,  0dch,  0f8h,  0a3h,  0d8h,  0d7h,  192h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  0aah,  0bah
		dw	 0bfh,  0aeh,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  0c1h,  0c2h,  0c0h
		dw	 0a9h, 2563h, 2551h, 2557h, 255dh,  0a2h,  0a5h, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  0e3h,  0c3h
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  0a4h
		dw	 0f0h,  0d0h,  0cah,  0cbh,  0c8h,  131h,  0cdh,  0ceh
		dw	 0cfh, 2518h, 250ch, 2588h, 2584h,  0a6h,  0cch, 2580h
		dw	 0d3h,  0dfh,  0d4h,  0d2h,  0f5h,  0d5h,  0b5h,  0feh
		dw	 0deh,  0dah,  0dbh,  0d9h,  0fdh,  0ddh,  0afh,  0b4h
		dw	 0adh,  0b1h, 2017h,  0beh,  0b6h,  0a7h,  0f7h,  0b8h
		dw	 0b0h,  0a8h,  0b7h,  0b9h,  0b3h,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-852 (Latin 2 Cent.Europe) to Unicode

CP852ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  16fh,  107h,  0e7h
		dw	 142h,  0ebh,  150h,  151h,  0eeh,  179h,  0c4h,  106h
		dw	 0c9h,  139h,  13ah,  0f4h,  0f6h,  13dh,  13eh,  15ah
		dw	 15bh,  0d6h,  0dch,  164h,  165h,  141h,  0d7h,  10dh
		dw	 0e1h,  0edh,  0f3h,  0fah,  104h,  105h,  17dh,  17eh
		dw	 118h,  119h,  0ach,  17ah,  10ch,  15fh,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  0c1h,  0c2h,  11ah
		dw	 15eh, 2563h, 2551h, 2557h, 255dh,  17bh,  17ch, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  102h,  103h
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  0a4h
		dw	 111h,  110h,  10eh,  0cbh,  10fh,  147h,  0cdh,  0ceh
		dw	 11bh, 2518h, 250ch, 2588h, 2584h,  162h,  16eh, 2580h
		dw	 0d3h,  0dfh,  0d4h,  143h,  144h,  148h,  160h,  161h
		dw	 154h,  0dah,  155h,  170h,  0fdh,  0ddh,  163h,  0b4h
		dw	 0adh,  2ddh,  2dbh,  2c7h,  2d8h,  0a7h,  0f7h,  0b8h
		dw	 0b0h,  0a8h,  2d9h,  171h,  158h,  159h, 25a0h,  0a0h

; ------------- Translation table from IBM-855 (Cyrillic, Russian) to Unicode

CP855ToUniTab:	dw	 452h,  402h,  453h,  403h,  451h,  401h,  454h,  404h
		dw	 455h,  405h,  456h,  406h,  457h,  407h,  458h,  408h
		dw	 459h,  409h,  45ah,  40ah,  45bh,  40bh,  45ch,  40ch
		dw	 45eh,  40eh,  45fh,  40fh,  44eh,  42eh,  44ah,  42ah
		dw	 430h,  410h,  431h,  411h,  446h,  426h,  434h,  414h
		dw	 435h,  415h,  444h,  424h,  433h,  413h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  445h,  425h,  438h
		dw	 418h, 2563h, 2551h, 2557h, 255dh,  439h,  419h, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  43ah,  41ah
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  0a4h
		dw	 43bh,  41bh,  43ch,  41ch,  43dh,  41dh,  43eh,  41eh
		dw	 43fh, 2518h, 250ch, 2588h, 2584h,  41fh,  44fh, 2580h
		dw	 42fh,  440h,  420h,  441h,  421h,  442h,  422h,  443h
		dw	 423h,  436h,  416h,  432h,  412h,  44ch,  42ch, 2116h
		dw	 0adh,  44bh,  42bh,  437h,  417h,  448h,  428h,  44dh
		dw	 42dh,  449h,  429h,  447h,  427h,  0a7h, 25a0h,  0a0h

; ------------- Translation table from IBM-857 (Turkish) to Unicode

CP857ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  0e0h,  0e5h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh,  131h,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  0f4h,  0f6h,  0f2h,  0fbh,  0f9h
		dw	 130h,  0d6h,  0dch,  0f8h,  0a3h,  0d8h,  15eh,  15fh
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  11eh,  11fh
		dw	 0bfh,  0aeh,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  0c1h,  0c2h,  0c0h
		dw	 0a9h, 2563h, 2551h, 2557h, 255dh,  0a2h,  0a5h, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  0e3h,  0c3h
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  0a4h
		dw	 0bah,  0aah,  0cah,  0cbh,  0c8h, CHINV,  0cdh,  0ceh
		dw	 0cfh, 2518h, 250ch, 2588h, 2584h,  0a6h,  0cch, 2580h
		dw	 0d3h,  0dfh,  0d4h,  0d2h,  0f5h,  0d5h,  0b5h, CHINV
		dw	 0d7h,  0dah,  0dbh,  0d9h,  0ech,  0ffh,  0afh,  0b4h
		dw	 0adh,  0b1h, CHINV,  0beh,  0b6h,  0a7h,  0f7h,  0b8h
		dw	 0b0h,  0a8h,  0b7h,  0b9h,  0b3h,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-858 (Latin 1 + Euro) to Unicode

CP858ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  0e0h,  0e5h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh,  0ech,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  0f4h,  0f6h,  0f2h,  0fbh,  0f9h
		dw	 0ffh,  0d6h,  0dch,  0f8h,  0a3h,  0d8h,  0d7h,  192h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  0aah,  0bah
		dw	 0bfh,  0aeh,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  0c1h,  0c2h,  0c0h
		dw	 0a9h, 2563h, 2551h, 2557h, 255dh,  0a2h,  0a5h, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  0e3h,  0c3h
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  0a4h
		dw	 0f0h,  0d0h,  0cah,  0cbh,  0c8h, 20ach,  0cdh,  0ceh
		dw	 0cfh, 2518h, 250ch, 2588h, 2584h,  0a6h,  0cch, 2580h
		dw	 0d3h,  0dfh,  0d4h,  0d2h,  0f5h,  0d5h,  0b5h,  0feh
		dw	 0deh,  0dah,  0dbh,  0d9h,  0fdh,  0ddh,  0afh,  0b4h
		dw	 0adh,  0b1h, 2017h,  0beh,  0b6h,  0a7h,  0f7h,  0b8h
		dw	 0b0h,  0a8h,  0b7h,  0b9h,  0b3h,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-860 (Portuguese) to Unicode

CP860ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e3h,  0e0h,  0c1h,  0e7h
		dw	 0eah,  0cah,  0e8h,  0cdh,  0d4h,  0ech,  0c3h,  0c2h
		dw	 0c9h,  0c0h,  0c8h,  0f4h,  0f5h,  0f2h,  0dah,  0f9h
		dw	 0cch,  0d5h,  0dch,  0a2h,  0a3h,  0d9h, 20a7h,  0d3h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  0aah,  0bah
		dw	 0bfh,  0d2h,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-861 (Icelandic) to Unicode

CP861ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  0e0h,  0e5h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0d0h,  0f0h,  0deh,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  0f4h,  0f6h,  0feh,  0fbh,  0ddh
		dw	 0fdh,  0d6h,  0dch,  0f8h,  0a3h,  0d8h, 20a7h,  192h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0c1h,  0cdh,  0d3h,  0dah
		dw	 0bfh, 2310h,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-862 (Hebrew) to Unicode

CP862ToUniTab:	INCW	5d0h, 5eah
		dw	                      0a2h,  0a3h,  0a5h, 20a7h,  192h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  0aah,  0bah
		dw	 0bfh, 2310h,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-863 (French Canadian) to Unicode

CP863ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0c2h,  0e0h,  0b6h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh, 2017h,  0c0h,  0a7h
		dw	 0c9h,  0c8h,  0cah,  0f4h,  0cbh,  0cfh,  0fbh,  0f9h
		dw	 0a4h,  0d4h,  0dch,  0a2h,  0a3h,  0d9h,  0dbh,  192h
		dw	 0a6h,  0b4h,  0f3h,  0fah,  0a8h,  0b8h,  0b3h,  0afh
		dw	 0ceh, 2310h,  0ach,  0bdh,  0bch,  0beh,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-865 (Nordic) to Unicode

CP865ToUniTab:	dw	 0c7h,  0fch,  0e9h,  0e2h,  0e4h,  0e0h,  0e5h,  0e7h
		dw	 0eah,  0ebh,  0e8h,  0efh,  0eeh,  0ech,  0c4h,  0c5h
		dw	 0c9h,  0e6h,  0c6h,  0f4h,  0f6h,  0f2h,  0fbh,  0f9h
		dw	 0ffh,  0d6h,  0dch,  0f8h,  0a3h,  0d8h, 20a7h,  192h
		dw	 0e1h,  0edh,  0f3h,  0fah,  0f1h,  0d1h,  0aah,  0bah
		dw	 0bfh, 2310h,  0ach,  0bdh,  0bch,  0a1h,  0abh,  0a4h
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from IBM-866 (Russian) to Unicode

CP866ToUniTab:	INCW	410h, 43fh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		INCW	440h, 44fh
		dw	 401h,  451h,  404h,  454h,  407h,  457h,  40eh,  45eh
		dw	 0b0h, 2219h,  0b7h, 221ah, 2116h,  0a4h, 25a0h,  0a0h

; ------------- Translation table from IBM-869 (Modern Greek) to Unicode

CP869ToUniTab:	dw	CHINV, CHINV, CHINV, CHINV, CHINV, CHINV,  386h,  386h
		dw	 0b7h,  0ach,  0a6h, 2018h, 2019h,  388h, 2015h,  389h
		dw	 38ah,  3aah,  38ch,  38ch,  38ch,  38eh,  3abh,  0a9h
		dw	 38fh,  0b2h,  0b3h,  3ach,  0a3h,  3adh,  3aeh,  3afh
		dw	 3cah,  390h,  3cch,  3cdh,  391h,  392h,  393h,  394h
		dw	 395h,  396h,  397h,  0bdh,  398h,  399h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h,  39ah,  39bh,  39ch
		dw	 39dh, 2563h, 2551h, 2557h, 255dh,  39eh,  39fh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch,  3a0h,  3a1h
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch,  3a3h
		dw	 3a4h,  3a5h,  3a6h,  3a7h,  3a8h,  3a9h,  3b1h,  3b2h
		dw	 3b3h, 2518h, 250ch, 2588h, 2584h,  3b4h,  3b5h, 2580h
		dw	 3b6h,  3b7h,  3b8h,  3b9h,  3bah,  3bbh,  3bch,  3bdh
		dw	 3beh,  3bfh,  3c0h,  3c1h,  3c3h,  3c2h,  3c4h,  384h
		dw	 0adh,  0b1h,  3c5h,  3c6h,  3c7h,  0a7h,  3c8h,  385h
		dw	 0b0h,  0a8h,  3c9h,  3cbh,  3b0h,  3ceh, 25a0h,  0a0h

; ------------- Translation table from IBM-895 (Kamenickych, Czech) to Unicode

CP895ToUniTab:	dw	 10ch,  0fch,  0e9h,  10fh,  0e4h,  10eh,  164h,  10dh
		dw	 11bh,  11ah,  139h,  0cdh,  13eh,  13ah,  0c4h,  0c1h
		dw	 0c9h,  17eh,  17dh,  0f4h,  0f6h,  0d3h,  16fh,  0dah
		dw	 0fdh,  0d6h,  0dch,  160h,  13dh,  0ddh,  158h,  165h
		dw	 0e1h,  0edh,  0f3h,  0fah,  148h,  147h,  16eh,  0d4h
		dw	 161h,  159h,  155h,  154h,  0bch,  0a1h,  0abh,  0bbh
		dw	2591h, 2592h, 2593h, 2502h, 2524h, 2561h, 2562h, 2556h
		dw	2555h, 2563h, 2551h, 2557h, 255dh, 255ch, 255bh, 2510h
		dw	2514h, 2534h, 252ch, 251ch, 2500h, 253ch, 255eh, 255fh
		dw	255ah, 2554h, 2569h, 2566h, 2560h, 2550h, 256ch, 2567h
		dw	2568h, 2564h, 2565h, 2559h, 2558h, 2552h, 2553h, 256bh
		dw	256ah, 2518h, 250ch, 2588h, 2584h, 258ch, 2590h, 2580h
		dw	 3b1h,  0dfh,  393h,  3c0h,  3a3h,  3c3h,  0b5h,  3c4h
		dw	 3a6h,  398h,  3a9h,  3b4h, 221eh,  3c6h,  3b5h, 2229h
		dw	2261h,  0b1h, 2265h, 2264h, 2320h, 2321h,  0f7h, 2248h
		dw	 0b0h, 2219h,  0b7h, 221ah, 207fh,  0b2h, 25a0h,  0a0h

; ------------- Translation table from Windows-874 (Thai) to Unicode

CP874ToUniTab:	dw	20ach, CHINV, CHINV, CHINV, CHINV, 2026h, CHINV, CHINV
		times	8 dw CHINV
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		times	8 dw CHINV
		dw	 0a0h
		INCW	0e01h, 0e3ah
		dw	CHINV, CHINV, CHINV, CHINV
		INCW	0e3fh, 0e5bh
		dw	CHINV, CHINV, CHINV, CHINV

; ------------- Translation table from Windows-1250 (Central Europe) to Unicode

CP1250ToUniTab:	dw	20ach, CHINV, 201ah, CHINV, 201eh, 2026h, 2020h, 2021h
		dw	CHINV, 2030h,  160h, 2039h,  15ah,  164h,  17dh,  179h
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	CHINV, 2122h,  161h, 203ah,  15bh,  165h,  17eh,  17ah
		dw	 0a0h,  2c7h,  2d8h,  141h,  0a4h,  104h,  0a6h,  0a7h
		dw	 0a8h,  0a9h,  15eh,  0abh,  0ach,  0adh,  0aeh,  17bh
		dw	 0b0h,  0b1h,  2dbh,  142h,  0b4h,  0b5h,  0b6h,  0b7h
		dw	 0b8h,  105h,  15fh,  0bbh,  13dh,  2ddh,  13eh,  17ch
		dw	 154h,  0c1h,  0c2h,  102h,  0c4h,  139h,  106h,  0c7h
		dw	 10ch,  0c9h,  118h,  0cbh,  11ah,  0cdh,  0ceh,  10eh
		dw	 110h,  143h,  147h,  0d3h,  0d4h,  150h,  0d6h,  0d7h
		dw	 158h,  16eh,  0dah,  170h,  0dch,  0ddh,  162h,  0dfh
		dw	 155h,  0e1h,  0e2h,  103h,  0e4h,  13ah,  107h,  0e7h
		dw	 10dh,  0e9h,  119h,  0ebh,  11bh,  0edh,  0eeh,  10fh
		dw	 111h,  144h,  148h,  0f3h,  0f4h,  151h,  0f6h,  0f7h
		dw	 159h,  16fh,  0fah,  171h,  0fch,  0fdh,  163h,  2d9h

; ------------- Translation table from Windows-1251 (Cyrillic) to Unicode

CP1251ToUniTab:	dw	 402h,  403h, 201ah,  453h, 201eh, 2026h, 2020h, 2021h
		dw	20ach, 2030h,  409h, 2039h,  40ah,  40ch,  40bh,  40fh
		dw	 452h, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	CHINV, 2122h,  459h, 203ah,  45ah,  45ch,  45bh,  45fh
		dw	 0a0h,  40eh,  45eh,  408h,  0a4h,  490h,  0a6h,  0a7h
		dw	 401h,  0a9h,  404h,  0abh,  0ach,  0adh,  0aeh,  407h
		dw	 0b0h,  0b1h,  406h,  456h,  491h,  0b5h,  0b6h,  0b7h
		dw	 451h, 2116h,  454h,  0bbh,  458h,  405h,  455h,  457h
		INCW	410h, 44fh

; ------------- Translat. table from Windows-1252 (Latin 1 Windows) to Unicode

CP1252ToUniTab:	dw	20ach, CHINV, 201ah,  192h, 201eh, 2026h, 2020h, 2021h
		dw	 2c6h, 2030h,  160h, 2039h,  152h, CHINV,  17dh, CHINV
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	 2dch, 2122h,  161h, 203ah,  153h, CHINV,  17eh,  178h
		INCW	0a0h, 0ffh

; ------------- Translation table from Windows-1253 (Greek) to Unicode

CP1253ToUniTab:	dw	20ach, CHINV, 201ah,  192h, 201eh, 2026h, 2020h, 2021h
		dw	CHINV, 2030h, CHINV, 2039h, CHINV, CHINV, CHINV, CHINV
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	CHINV, 2122h, CHINV, 203ah, CHINV, CHINV, CHINV, CHINV
		dw	 0a0h,  385h,  386h,  0a3h,  0a4h,  0a5h,  0a6h,  0a7h
		dw	 0a8h,  0a9h, CHINV,  0abh,  0ach,  0adh,  0aeh, 2015h
		dw	 0b0h,  0b1h,  0b2h,  0b3h,  384h,  0b5h,  0b6h,  0b7h
		dw	 388h,  389h,  38ah,  0bbh,  38ch,  0bdh
                INCW    38eh, 3a1h
		dw	CHINV
		INCW	3a3h, 3ceh
		dw	CHINV

; ------------- Translation table from Windows-1254 (Turkish) to Unicode

CP1254ToUniTab:	dw	20ach, CHINV, 201ah,  192h, 201eh, 2026h, 2020h, 2021h
		dw	 2c6h, 2030h,  160h, 2039h,  152h, CHINV, CHINV, CHINV
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	 2dch, 2122h,  161h, 203ah,  153h, CHINV, CHINV,  178h
		INCW	0a0h, 0cfh
		dw	11eh
		INCW	0d1h, 0dch
		dw	130h, 15eh
		INCW	0dfh, 0efh
		dw	11fh
		INCW	0f1h, 0fch
		dw	131h, 15fh, 0ffh

; ------------- Translation table from Windows-1255 (Hebrew) to Unicode

CP1255ToUniTab:	dw	20ach, CHINV, 201ah,  192h, 201eh, 2026h, 2020h, 2021h
		dw	 2c6h, 2030h, CHINV, 2039h, CHINV, CHINV, CHINV, CHINV
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	 2dch, 2122h, CHINV, 203ah, CHINV, CHINV, CHINV, CHINV
		dw	 0a0h,  0a1h,  0a2h,  0a3h, 20aah,  0a5h,  0a6h,  0a7h
		dw	 0a8h,  0a9h,  0d7h,  0abh,  0ach,  0adh,  0aeh,  0afh
		dw	 0b0h,  0b1h,  0b2h,  0b3h,  0b4h,  0b5h,  0b6h,  0b7h
		dw	 0b8h,  0b9h,  0f7h,  0bbh,  0bch,  0bdh,  0beh,  0bfh
		dw	 5b0h,  5b1h,  5b2h,  5b3h,  5b4h,  5b5h,  5b6h,  5b7h
		dw	 5b8h,  5b9h, CHINV,  5bbh,  5bch,  5bdh,  5beh,  5bfh
		dw	 5c0h,  5c1h,  5c2h,  5c3h,  5f0h,  5f1h,  5f2h,  5f3h
		dw	 5f4h, CHINV, CHINV, CHINV, CHINV, CHINV, CHINV, CHINV
		INCW	5d0h, 5eah
		dw	CHINV, CHINV, 200eh, 200fh, CHINV

; ------------- Translation table from Windows-1256 (Arabic) to Unicode

CP1256ToUniTab:	dw	20ach,  67eh, 201ah,  192h, 201eh, 2026h, 2020h, 2021h
		dw	 2c6h, 2030h,  679h, 2039h,  152h,  686h,  698h,  688h
		dw	 6afh, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	 6a9h, 2122h,  691h, 203ah,  153h, 200ch, 200dh,  6bah
		dw	 0a0h,  60ch,  0a2h,  0a3h,  0a4h,  0a5h,  0a6h,  0a7h
		dw	 0a8h,  0a9h,  6beh,  0abh,  0ach,  0adh,  0aeh,  0afh
		dw	 0b0h,  0b1h,  0b2h,  0b3h,  0b4h,  0b5h,  0b6h,  0b7h
		dw	 0b8h,  0b9h,  61bh,  0bbh,  0bch,  0bdh,  0beh,  61fh
		dw	 6c1h
		INCW	 621h, 636h
		dw	 0d7h
		dw	 637h,  638h,  639h,  63ah,  640h,  641h,  642h,  643h
		dw	 0e0h,  644h,  0e2h,  645h,  646h,  647h,  648h,  0e7h
		dw	 0e8h,  0e9h,  0eah,  0ebh,  649h,  64ah,  0eeh,  0efh
		dw	 64bh,  64ch,  64dh,  64eh,  0f4h,  64fh,  650h,  0f7h
		dw	 651h,  0f9h,  652h,  0fbh,  0fch, 200eh, 200fh,  6d2h

; ------------- Translation table from Windows-1257 (Baltic) to Unicode

CP1257ToUniTab:	dw	20ach, CHINV, 201ah, CHINV, 201eh, 2026h, 2020h, 2021h
		dw	CHINV, 2030h, CHINV, 2039h, CHINV,  0a8h,  2c7h,  0b8h
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	CHINV, 2122h, CHINV, 203ah, CHINV,  0afh,  2dbh, CHINV
		dw	 0a0h, CHINV,  0a2h,  0a3h,  0a4h, CHINV,  0a6h,  0a7h
		dw	 0d8h,  0a9h,  156h,  0abh,  0ach,  0adh,  0aeh,  0c6h
		dw	 0b0h,  0b1h,  0b2h,  0b3h,  0b4h,  0b5h,  0b6h,  0b7h
		dw	 0f8h,  0b9h,  157h,  0bbh,  0bch,  0bdh,  0beh,  0e6h
		dw	 104h,  12eh,  100h,  106h,  0c4h,  0c5h,  118h,  112h
		dw	 10ch,  0c9h,  179h,  116h,  122h,  136h,  12ah,  13bh
		dw	 160h,  143h,  145h,  0d3h,  14ch,  0d5h,  0d6h,  0d7h
		dw	 172h,  141h,  15ah,  16ah,  0dch,  17bh,  17dh,  0dfh
		dw	 105h,  12fh,  101h,  107h,  0e4h,  0e5h,  119h,  113h
		dw	 10dh,  0e9h,  17ah,  117h,  123h,  137h,  12bh,  13ch
		dw	 161h,  144h,  146h,  0f3h,  14dh,  0f5h,  0f6h,  0f7h
		dw	 173h,  142h,  15bh,  16bh,  0fch,  17ch,  17eh,  2d9h

; ------------- Translation table from Windows-1258 (Vietnam) to Unicode

CP1258ToUniTab:	dw	20ach, CHINV, 201ah,  192h, 201eh, 2026h, 2020h, 2021h
		dw	 2c6h, 2030h, CHINV, 2039h,  152h, CHINV, CHINV, CHINV
		dw	CHINV, 2018h, 2019h, 201ch, 201dh, 2022h, 2013h, 2014h
		dw	 2dch, 2122h, CHINV, 203ah,  153h, CHINV, CHINV,  178h
		INCW	0a0h, 0bfh
		dw	 0c0h,  0c1h,  0c2h,  102h,  0c4h,  0c5h,  0c6h,  0c7h
		dw	 0c8h,  0c9h,  0cah,  0cbh,  300h,  0cdh,  0ceh,  0cfh
		dw	 110h,  0d1h,  309h,  0d3h,  0d4h,  1a0h,  0d6h,  0d7h
		dw	 0d8h,  0d9h,  0dah,  0dbh,  0dch,  1afh,  303h,  0dfh
		dw	 0e0h,  0e1h,  0e2h,  103h,  0e4h,  0e5h,  0e6h,  0e7h
		dw	 0e8h,  0e9h,  0eah,  0ebh,  301h,  0edh,  0eeh,  0efh
		dw	 111h,  0f1h,  323h,  0f3h,  0f4h,  1a1h,  0f6h,  0f7h
		dw	 0f8h,  0f9h,  0fah,  0fbh,  0fch,  1b0h, 20abh,  0ffh

; ------------- Translation table from ISO 8859-2 (Latin 2) to Unicode

CP28592ToUniTab:times	32 dw CHINV
		dw	 0a0h,  104h,  2d8h,  141h,  0a4h,  13dh,  15ah,  0a7h
		dw	 0a8h,  160h,  15eh,  164h,  179h,  0adh,  17dh,  17bh
		dw	 0b0h,  105h,  2dbh,  142h,  0b4h,  13eh,  15bh,  2c7h
		dw	 0b8h,  161h,  15fh,  165h,  17ah,  2ddh,  17eh,  17ch
		dw	 154h,  0c1h,  0c2h,  102h,  0c4h,  139h,  106h,  0c7h
		dw	 10ch,  0c9h,  118h,  0cbh,  11ah,  0cdh,  0ceh,  10eh
		dw	 110h,  143h,  147h,  0d3h,  0d4h,  150h,  0d6h,  0d7h
		dw	 158h,  16eh,  0dah,  170h,  0dch,  0ddh,  162h,  0dfh
		dw	 155h,  0e1h,  0e2h,  103h,  0e4h,  13ah,  107h,  0e7h
		dw	 10dh,  0e9h,  119h,  0ebh,  11bh,  0edh,  0eeh,  10fh
		dw	 111h,  144h,  148h,  0f3h,  0f4h,  151h,  0f6h,  0f7h
		dw	 159h,  16fh,  0fah,  171h,  0fch,  0fdh,  163h,  2d9h

; ------------- Translation table from ISO 8859-3 (Latin 3) to Unicode

CP28593ToUniTab:times	32 dw CHINV
		dw	 0a0h,  126h,  2d8h,  0a3h,  0a4h, CHINV,  124h,  0a7h
		dw	 0a8h,  130h,  15eh,  11eh,  134h,  0adh, CHINV,  17bh
		dw	 0b0h,  127h,  0b2h,  0b3h,  0b4h,  0b5h,  125h,  0b7h
		dw	 0b8h,  131h,  15fh,  11fh,  135h,  0bdh, CHINV,  17ch
		dw	 0c0h,  0c1h,  0c2h, CHINV,  0c4h,  10ah,  108h,  0c7h
		dw	 0c8h,  0c9h,  0cah,  0cbh,  0cch,  0cdh,  0ceh,  0cfh
		dw	CHINV,  0d1h,  0d2h,  0d3h,  0d4h,  120h,  0d6h,  0d7h
		dw	 11ch,  0d9h,  0dah,  0dbh,  0dch,  16ch,  15ch,  0dfh
		dw	 0e0h,  0e1h,  0e2h, CHINV,  0e4h,  10bh,  109h,  0e7h
		dw	 0e8h,  0e9h,  0eah,  0ebh,  0ech,  0edh,  0eeh,  0efh
		dw	CHINV,  0f1h,  0f2h,  0f3h,  0f4h,  121h,  0f6h,  0f7h
		dw	 11dh,  0f9h,  0fah,  0fbh,  0fch,  16dh,  15dh,  2d9h

; ------------- Translation table from ISO 8859-4 (Baltic) to Unicode

CP28594ToUniTab:times	32 dw CHINV
		dw	 0a0h,  104h,  138h,  156h,  0a4h,  128h,  13bh,  0a7h
		dw	 0a8h,  160h,  112h,  122h,  166h,  0adh,  17dh,  0afh
		dw	 0b0h,  105h,  2dbh,  157h,  0b4h,  129h,  13ch,  2c7h
		dw	 0b8h,  161h,  113h,  123h,  167h,  14ah,  17eh,  14bh
		dw	 100h,  0c1h,  0c2h,  0c3h,  0c4h,  0c5h,  0c6h,  12eh
		dw	 10ch,  0c9h,  118h,  0cbh,  116h,  0cdh,  0ceh,  12ah
		dw	 110h,  145h,  14ch,  136h,  0d4h,  0d5h,  0d6h,  0d7h
		dw	 0d8h,  172h,  0dah,  0dbh,  0dch,  168h,  16ah,  0dfh
		dw	 101h,  0e1h,  0e2h,  0e3h,  0e4h,  0e5h,  0e6h,  12fh
		dw	 10dh,  0e9h,  119h,  0ebh,  117h,  0edh,  0eeh,  12bh
		dw	 111h,  146h,  14dh,  137h,  0f4h,  0f5h,  0f6h,  0f7h
		dw	 0f8h,  173h,  0fah,  0fbh,  0fch,  169h,  16bh,  2d9h

; ------------- Translation table from ISO 8859-5 (Cyrillic) to Unicode

CP28595ToUniTab:times	32 dw CHINV
		dw	0a0h
		INCW	401h, 40ch
		dw	0adh
		INCW	40eh, 44fh
		dw	2116h
		INCW	451h, 45ch
		dw	0a7h, 45eh, 45fh

; ------------- Translation table from ISO 8859-6 (Arabic) to Unicode

CP28596ToUniTab:times	32 dw CHINV
		dw	 0a0h, CHINV, CHINV, CHINV,  0a4h, CHINV, CHINV, CHINV
		dw	CHINV, CHINV, CHINV, CHINV,  60ch,  0adh, CHINV, CHINV
		times	8 dw CHINV
		dw	CHINV, CHINV, CHINV,  61bh, CHINV, CHINV, CHINV,  61fh
		dw	CHINV
		INCW	621h, 63ah
		times	5 dw CHINV
		INCW	640h, 652h
		times	13 dw CHINV

; ------------- Translation table from ISO 8859-7 (Greek) to Unicode

CP28597ToUniTab:times	32 dw CHINV
		dw	 0a0h,  2bdh,  2bch,  0a3h, CHINV, CHINV,  0a6h,  0a7h
		dw	 0a8h,  0a9h, CHINV,  0abh,  0ach,  0adh, CHINV, 2015h
		dw	 0b0h,  0b1h,  0b2h,  0b3h,  384h,  385h,  386h,  0b7h
		dw	 388h,  389h,  38ah,  0bbh,  38ch,  0bdh
		INCW	38eh, 3a1h
		dw	CHINV
		INCW	3a3h, 3ceh
		dw	CHINV

; ------------- Translation table from ISO 8859-8 (Hebrew) to Unicode

CP28598ToUniTab:times	32 dw CHINV
		dw	 0a0h, CHINV,  0a2h,  0a3h,  0a4h,  0a5h,  0a6h,  0a7h
		dw	 0a8h,  0a9h,  0d7h,  0abh,  0ach,  0adh,  0aeh, 203eh
		INCW	0b0h, 0b7h
		dw	 0b8h,  0b9h,  0f7h,  0bbh,  0bch,  0bdh,  0beh, CHINV
		times	31 dw CHINV
		dw	2017h
		INCW	5d0h, 5eah
		times	5 dw CHINV

; ------------- Translation table from ISO 8859-9 (Turkish) to Unicode

CP28599ToUniTab:times	32 dw CHINV
		INCW	0a0h, 0cfh
		dw	 11eh,  0d1h,  0d2h,  0d3h,  0d4h,  0d5h,  0d6h,  0d7h
		dw	 0d8h,  0d9h,  0dah,  0dbh,  0dch,  130h,  15eh,  0dfh
                INCW    0e0h, 0efh
		dw	 11fh,  0f1h,  0f2h,  0f3h,  0f4h,  0f5h,  0f6h,  0f7h
		dw	 0f8h,  0f9h,  0fah,  0fbh,  0fch,  131h,  15fh,  0ffh

; ------------- Translation table from ISO 8859-15 (Latin 9) to Unicode

CP28605ToUniTab:times	32 dw CHINV
		dw	 0a0h,  0a1h,  0a2h,  0a3h, 20ach,  0a5h,  160h,  0a7h
		dw	 161h,  0a9h,  0aah,  0abh,  0ach,  0adh,  0aeh,  0afh
		dw	 0b0h,  0b1h,  0b2h,  0b3h,  17dh,  0b5h,  0b6h,  0b7h
		dw	 17eh,  0b9h,  0bah,  0bbh,  152h,  153h,  178h,  0bfh
		INCW	0c0h, 0ffh

; -----------------------------------------------------------------------------
;                            Uninitialized data
; -----------------------------------------------------------------------------

		BSS_SECTION

; ------------- List of code pages (reversible, for quick search)

		align	4, resb 1
CharSetCodePage:resd	CHARSETNUM

; ------------- Tables from Unicode (only empty heads)

		align	4, resb 1
CharSetFromUni:	resb	CHSINIS
CharSetFromUni2:

; ------------- Translation table from ASCII to Unicode (=invalid characters)

		align	4, resb 1
CP0ToUniTab:	resw	128 - 32

; 32 characters are shared with CP28591ToUniTab (=invalid characters)

; ------------- Translation table from ISO 8859-1 (Latin 1) to Unicode

		align	4, resb 1
CP28591ToUniTab:resw	128

Back to source browser