mirror of
https://github.com/lihop/godot-xterm.git
synced 2025-06-28 18:25:31 +02:00
Add all the files
This commit is contained in:
parent
d7db117f8b
commit
96e9ddcf79
68 changed files with 9064 additions and 7 deletions
269
addons/godot_xterm/input/text_decoder.gd
Normal file
269
addons/godot_xterm/input/text_decoder.gd
Normal file
|
@ -0,0 +1,269 @@
|
|||
# Copyright (c) 2020 The GodotTerm authors.
|
||||
# Copyright (c) 2019 The xterm.js authors. All rights reserved.
|
||||
# License MIT
|
||||
extends Reference
|
||||
|
||||
# Convert a given to a utf8 PoolByteArray.
|
||||
# The code for this function is based on the stackoverflow
|
||||
# answer by user Schwern https://stackoverflow.com/a/42013984.
|
||||
static func utf32_to_utf8(codepoint: int):
|
||||
var utf8 = PoolByteArray([])
|
||||
|
||||
if codepoint <= 0x007F:
|
||||
utf8.append(codepoint)
|
||||
elif codepoint <= 0x07FF:
|
||||
utf8.append(0b11000000 | codepoint >> 6 & 0b00011111)
|
||||
utf8.append(0b10000000 | codepoint & 0b00111111)
|
||||
elif codepoint <= 0xFFFF:
|
||||
utf8.append(0b11100000 | codepoint >> 12 & 0b00001111)
|
||||
utf8.append(0b10000000 | codepoint >> 6 & 0b00111111)
|
||||
utf8.append(0b10000000 | codepoint & 0b00111111)
|
||||
elif codepoint <= 0x10FFFF:
|
||||
utf8.append(0b11110000 | codepoint >> 18 & 0b00000111)
|
||||
utf8.append(0b10000000 | codepoint >> 12 & 0b00111111)
|
||||
utf8.append(0b10000000 | codepoint >> 6 & 0b00111111)
|
||||
utf8.append(0b10000000 | codepoint & 0b00111111)
|
||||
else:
|
||||
push_warning("Codepoint " + String(codepoint) + " is out of UTF-8 range")
|
||||
|
||||
return utf8
|
||||
|
||||
# Convert UTF32 codepoint into a String.
|
||||
static func string_from_codepoint(codepoint: int):
|
||||
var utf8 = utf32_to_utf8(codepoint)
|
||||
return utf8.get_string_from_utf8()
|
||||
|
||||
# Covert UTF32 char codes into a String.
|
||||
# Basically the same as `string_from_codepoint` but for multiple codepoints
|
||||
# in a loop (which is a lot faster).
|
||||
static func utf32_to_string(data: Array, start: int = 0, end: int = -1):
|
||||
if end == -1:
|
||||
end = data.size()
|
||||
var result = ''
|
||||
for i in range(start, end):
|
||||
result += string_from_codepoint(data[i])
|
||||
return result
|
||||
|
||||
# Utf8Decoder - decodes UTF8 byte sequences into UTF32 codepoints.
|
||||
class Utf8ToUtf32:
|
||||
var interim = PoolByteArray()
|
||||
|
||||
func _init():
|
||||
interim.resize(3)
|
||||
|
||||
# Clears interim bytes and resets decoder to clean state.
|
||||
func clear():
|
||||
for i in interim.size():
|
||||
interim[i] = 0
|
||||
|
||||
# Decodes UTF8 byte sequences in `input` to UTF32 codepoints in `target`.
|
||||
# The methods assumes stream input and will store partly transmitted bytes
|
||||
# and decode them with the next data chunk.
|
||||
# Note: The method does no bound checks for target, therefore make sure
|
||||
# the provided data chunk does not exceed the size of `target`.
|
||||
# Returns the number of written codepoints in `target`.
|
||||
func decode(input: PoolByteArray, target: Array):
|
||||
var length = input.size()
|
||||
|
||||
if !length:
|
||||
return 0
|
||||
|
||||
if length > target.size():
|
||||
target.resize(length)
|
||||
|
||||
var size = 0
|
||||
var byte1: int
|
||||
var byte2: int
|
||||
var byte3: int
|
||||
var byte4: int
|
||||
var codepoint = 0
|
||||
var start_pos = 0
|
||||
|
||||
# handle leftover bytes
|
||||
if interim[0]:
|
||||
var discard_interim = false
|
||||
var cp = interim[0]
|
||||
cp &= 0x1F if (cp & 0xE0) == 0xC0 else 0x0F if (cp & 0xF0) == 0xE0 else 0x07
|
||||
var pos = 1
|
||||
var tmp = interim[pos] & 0x3F
|
||||
while tmp && pos < 4:
|
||||
cp <<= 6
|
||||
cp |= tmp
|
||||
pos += 1
|
||||
tmp = interim[pos] & 0x3F if interim.size() < pos else 0
|
||||
# missing bytes - read from input
|
||||
var type = 2 if (interim[0] & 0xE0) == 0xC0 else 3 if (interim[0] & 0xF0) == 0xE0 else 4
|
||||
var missing = type - pos
|
||||
while start_pos < missing:
|
||||
if start_pos >= length:
|
||||
return 0
|
||||
tmp = input[start_pos]
|
||||
start_pos += 1
|
||||
if (tmp & 0xC0) != 0x80:
|
||||
# wrong continuation, discard interim bytes completely
|
||||
start_pos -= 1
|
||||
discard_interim = true
|
||||
break
|
||||
else:
|
||||
# need to save so we can continue short inputs in next call
|
||||
interim[pos + 1] = tmp
|
||||
pos += 1
|
||||
cp <<= 6
|
||||
cp |= tmp & 0x3F
|
||||
if not discard_interim:
|
||||
# final test is type dependent
|
||||
match type:
|
||||
2:
|
||||
if cp < 0x80:
|
||||
# wrong starter byte
|
||||
start_pos -= 1
|
||||
else:
|
||||
target[size] = cp
|
||||
size += 1
|
||||
3:
|
||||
if cp < 0x0800 or (cp >= 0xD800 and cp <= 0xDFFF):
|
||||
# illegal codepoint
|
||||
pass
|
||||
else:
|
||||
target[size] = cp
|
||||
size += 1
|
||||
_:
|
||||
if cp < 0x10000 or cp > 0x10FFFF:
|
||||
# illegal codepoint
|
||||
pass
|
||||
else:
|
||||
target[size] = cp
|
||||
size += 1
|
||||
clear()
|
||||
|
||||
# loop through input
|
||||
var four_stop = length - 4
|
||||
var i = start_pos
|
||||
while i < length:
|
||||
# ASCII shortcut with loop unrolled to 4 consecutive ASCII chars.
|
||||
# This is a compromise between speed gain for ASCII
|
||||
# and penalty for non ASCII:
|
||||
# For best ASCII performance the char should be stored directly into target,
|
||||
# but even a single attempt to write to target and compare afterwards
|
||||
# penalizes non ASCII really bad (-50%), thus we load the char into byteX first,
|
||||
# which reduces ASCII performance by ~15%.
|
||||
# This trial for ASCII reduces non ASCII performance by ~10% which seems acceptible
|
||||
# compared to the gains.
|
||||
# Note that this optimization only takes place for 4 consecutive ASCII chars,
|
||||
# for any shorter it bails out. Worst case - all 4 bytes being read but
|
||||
# thrown away due to the last being a non ASCII char (-10% performance).
|
||||
while i < four_stop:
|
||||
byte1 = input[i]
|
||||
byte2 = input[i + 1]
|
||||
byte3 = input[i + 2]
|
||||
byte4 = input[i + 3]
|
||||
if not (byte1 & 0x80) | (byte2 & 0x80) | (byte3 & 0x80) | (byte4 & 0x80):
|
||||
target[size] = byte1
|
||||
target[size+1] = byte2
|
||||
target[size+2] = byte3
|
||||
target[size+3] = byte4
|
||||
size += 4
|
||||
i += 4
|
||||
else:
|
||||
break
|
||||
|
||||
# reread byte1
|
||||
byte1 = input[i]
|
||||
i += 1
|
||||
|
||||
# 1 byte
|
||||
if byte1 < 0x80:
|
||||
target[size] = byte1
|
||||
size += 1
|
||||
|
||||
# 2 bytes
|
||||
elif (byte1 & 0xE0) == 0xC0:
|
||||
if i >= length:
|
||||
interim[0] = byte1
|
||||
return size
|
||||
byte2 = input[i]
|
||||
i+=1
|
||||
if (byte2 & 0xC0) != 0x80:
|
||||
# wrong continuation
|
||||
i-=1
|
||||
continue
|
||||
codepoint = (byte1 & 0x1F) << 6 | (byte2 & 0x3F)
|
||||
if (codepoint < 0x80):
|
||||
# wrong starter byte
|
||||
i-=1
|
||||
continue
|
||||
target[size] = codepoint
|
||||
size+=1
|
||||
|
||||
# 3 bytes
|
||||
elif (byte1 & 0xF0) == 0xE0:
|
||||
if i >= length:
|
||||
interim[0] = byte1
|
||||
return size
|
||||
byte2 = input[i]
|
||||
i+=1
|
||||
if (byte2 & 0xC0) != 0x80:
|
||||
# wrong continuation
|
||||
i-=1
|
||||
continue
|
||||
if i >= length:
|
||||
interim[0] = byte1
|
||||
interim[1] = byte2
|
||||
return size
|
||||
byte3 = input[i]
|
||||
i+=1
|
||||
if (byte3 & 0xC0) != 0x80:
|
||||
# wrong continuation
|
||||
i-=1
|
||||
continue
|
||||
codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F)
|
||||
if codepoint < 0x0800 or (codepoint >=0xD800 and codepoint <= 0xDFFF):
|
||||
# illegal codepoint, no i-- here
|
||||
continue
|
||||
target[size] = codepoint
|
||||
size+=1
|
||||
|
||||
# 4 bytes
|
||||
elif (byte1 & 0xF8) == 0xF0:
|
||||
if i >= length:
|
||||
interim[0] = byte1
|
||||
return size
|
||||
byte2 = input[i]
|
||||
i += 1
|
||||
if (byte2 & 0xC0) != 0x80:
|
||||
# wrong continuation
|
||||
i -= 1
|
||||
continue
|
||||
if i >= length:
|
||||
interim[0] = byte1
|
||||
interim[1] = byte2
|
||||
return size
|
||||
byte3 = input[i]
|
||||
i += 1
|
||||
if (byte3 & 0xC0) != 0x80:
|
||||
# wrong continuation
|
||||
i -= 1
|
||||
continue
|
||||
if i >= length:
|
||||
interim[0] = byte1
|
||||
interim[1] = byte2
|
||||
interim[2] = byte3
|
||||
return size
|
||||
byte4 = input[i]
|
||||
i += 1
|
||||
if (byte4 & 0xC0) != 0x80:
|
||||
# wrong continuation
|
||||
i -= 1
|
||||
continue
|
||||
codepoint = (byte1 & 0x07) << 18 | (byte2 & 0x3F) << 12 | (byte3 & 0x3F) << 6 | (byte4 & 0x3F)
|
||||
if codepoint < 0x010000 or codepoint > 0x10FFFF:
|
||||
# illegal codepoint, no i-- here
|
||||
continue
|
||||
target[size] = codepoint
|
||||
size += 1
|
||||
else:
|
||||
# illegal byte, just skip
|
||||
pass
|
||||
|
||||
target.resize(size)
|
||||
return size
|
Loading…
Add table
Add a link
Reference in a new issue