Add all the files

2025-06-28 18:25:31 +02:00 · 2020-05-10 00:07:31 +07:00 · 2020-05-10 00:07:31 +07:00 · 96e9ddcf79
commit 96e9ddcf79
parent d7db117f8b
68 changed files with 9064 additions and 7 deletions
--- a/addons/godot_xterm/input/text_decoder.gd
+++ b/addons/godot_xterm/input/text_decoder.gd
@ -0,0 +1,269 @@
+# Copyright (c) 2020 The GodotTerm authors.
+# Copyright (c) 2019 The xterm.js authors. All rights reserved.
+# License MIT
+extends Reference
+
+# Convert a given to a utf8 PoolByteArray.
+# The code for this function is based on the stackoverflow
+# answer by user Schwern https://stackoverflow.com/a/42013984.
+static func utf32_to_utf8(codepoint: int):
+	var utf8 = PoolByteArray([])
+	
+	if codepoint <= 0x007F:
+		utf8.append(codepoint)
+	elif codepoint <= 0x07FF:
+		utf8.append(0b11000000 | codepoint >> 6 & 0b00011111)
+		utf8.append(0b10000000 | codepoint & 0b00111111)
+	elif codepoint <= 0xFFFF:
+		utf8.append(0b11100000 | codepoint >> 12 & 0b00001111)
+		utf8.append(0b10000000 | codepoint >> 6 & 0b00111111)
+		utf8.append(0b10000000 | codepoint & 0b00111111)
+	elif codepoint <= 0x10FFFF:
+		utf8.append(0b11110000 | codepoint >> 18 & 0b00000111)
+		utf8.append(0b10000000 | codepoint >> 12 & 0b00111111)
+		utf8.append(0b10000000 | codepoint >> 6 & 0b00111111)
+		utf8.append(0b10000000 | codepoint & 0b00111111)
+	else:
+		push_warning("Codepoint " + String(codepoint) + " is out of UTF-8 range")
+	
+	return utf8
+
+# Convert UTF32 codepoint into a String.
+static func string_from_codepoint(codepoint: int):
+	var utf8 = utf32_to_utf8(codepoint)
+	return utf8.get_string_from_utf8()
+
+# Covert UTF32 char codes into a String.
+# Basically the same as `string_from_codepoint` but for multiple codepoints
+# in a loop (which is a lot faster).
+static func utf32_to_string(data: Array, start: int = 0, end: int = -1):
+	if end == -1:
+		end = data.size()
+	var result = ''
+	for i in range(start, end):
+		result += string_from_codepoint(data[i])
+	return result
+
+# Utf8Decoder - decodes UTF8 byte sequences into UTF32 codepoints.
+class Utf8ToUtf32:
+	var interim = PoolByteArray()
+	
+	func _init():
+		interim.resize(3)
+	
+	# Clears interim bytes and resets decoder to clean state.
+	func clear():
+		for i in interim.size():
+			interim[i] = 0
+	
+	# Decodes UTF8 byte sequences in `input` to UTF32 codepoints in `target`.
+	# The methods assumes stream input and will store partly transmitted bytes
+	# and decode them with the next data chunk.
+	# Note: The method does no bound checks for target, therefore make sure
+	# the provided data chunk does not exceed the size of `target`.
+	# Returns the number of written codepoints in `target`.
+	func decode(input: PoolByteArray, target: Array):
+		var length = input.size()
+		
+		if !length:
+			return 0
+		
+		if length > target.size():
+			target.resize(length)
+		
+		var size = 0
+		var byte1: int
+		var byte2: int
+		var byte3: int
+		var byte4: int
+		var codepoint = 0
+		var start_pos = 0
+		
+		# handle leftover bytes
+		if interim[0]:
+			var discard_interim = false
+			var cp = interim[0]
+			cp &= 0x1F if (cp & 0xE0) == 0xC0 else 0x0F if (cp & 0xF0) == 0xE0 else 0x07
+			var pos = 1
+			var tmp = interim[pos] & 0x3F
+			while tmp && pos < 4:
+				cp <<= 6
+				cp |= tmp
+				pos += 1
+				tmp = interim[pos] & 0x3F if interim.size() < pos else 0
+			# missing bytes - read from input
+			var type = 2 if (interim[0] & 0xE0) == 0xC0 else 3 if (interim[0] & 0xF0) == 0xE0 else 4
+			var missing = type - pos
+			while start_pos < missing:
+				if start_pos >= length:
+					return 0
+				tmp = input[start_pos]
+				start_pos += 1
+				if (tmp & 0xC0) != 0x80:
+					# wrong continuation, discard interim bytes completely
+					start_pos -= 1
+					discard_interim = true
+					break
+				else:
+					# need to save so we can continue short inputs in next call
+					interim[pos + 1] = tmp
+					pos += 1
+					cp <<= 6
+					cp |= tmp & 0x3F
+			if not discard_interim:
+				# final test is type dependent
+				match type:
+					2:
+						if cp < 0x80:
+							# wrong starter byte
+							start_pos -= 1
+						else:
+							target[size] = cp
+							size += 1
+					3:
+						if cp < 0x0800 or (cp >= 0xD800 and cp <= 0xDFFF):
+							# illegal codepoint
+							pass
+						else:
+							target[size] = cp
+							size += 1
+					_:
+						if cp < 0x10000 or cp > 0x10FFFF:
+							# illegal codepoint
+							pass
+						else:
+							target[size] = cp
+							size += 1
+			clear()
+		
+		# loop through input
+		var four_stop = length - 4
+		var i = start_pos
+		while i < length:
+			# ASCII shortcut with loop unrolled to 4 consecutive ASCII chars.
+			# This is a compromise between speed gain for ASCII
+			# and penalty for non ASCII:
+			# For best ASCII performance the char should be stored directly into target,
+			# but even a single attempt to write to target and compare afterwards
+			# penalizes non ASCII really bad (-50%), thus we load the char into byteX first,
+			# which reduces ASCII performance by ~15%.
+			# This trial for ASCII reduces non ASCII performance by ~10% which seems acceptible
+			# compared to the gains.
+			# Note that this optimization only takes place for 4 consecutive ASCII chars,
+			# for any shorter it bails out. Worst case - all 4 bytes being read but
+			# thrown away due to the last being a non ASCII char (-10% performance).
+			while i < four_stop:
+				byte1 = input[i]
+				byte2 = input[i + 1]
+				byte3 = input[i + 2]
+				byte4 = input[i + 3]
+				if  not (byte1 & 0x80) | (byte2 & 0x80) | (byte3 & 0x80) | (byte4 & 0x80):
+					target[size] = byte1
+					target[size+1] = byte2
+					target[size+2] = byte3
+					target[size+3] = byte4
+					size += 4
+					i += 4
+				else:
+					break
+				
+			# reread byte1
+			byte1 = input[i]
+			i += 1
+			
+			# 1 byte
+			if byte1 < 0x80:
+				target[size] = byte1
+				size += 1
+			
+			# 2 bytes
+			elif (byte1 & 0xE0) == 0xC0:
+				if i >= length:
+					interim[0] = byte1
+					return size
+				byte2 = input[i]
+				i+=1
+				if (byte2 & 0xC0) != 0x80:
+					# wrong continuation
+					i-=1
+					continue
+				codepoint = (byte1 & 0x1F) << 6 | (byte2 & 0x3F)
+				if (codepoint < 0x80):
+					# wrong starter byte
+					i-=1
+					continue
+				target[size] = codepoint
+				size+=1
+			
+			# 3 bytes
+			elif (byte1 & 0xF0) == 0xE0:
+				if i >= length:
+					interim[0] = byte1
+					return size
+				byte2 = input[i]
+				i+=1
+				if (byte2 & 0xC0) != 0x80:
+					# wrong continuation
+					i-=1
+					continue
+				if i >= length:
+					interim[0] = byte1
+					interim[1] = byte2
+					return size
+				byte3 = input[i]
+				i+=1
+				if (byte3 & 0xC0) != 0x80:
+					# wrong continuation
+					i-=1
+					continue
+				codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F)
+				if codepoint < 0x0800 or (codepoint >=0xD800 and codepoint <= 0xDFFF):
+					# illegal codepoint, no i-- here
+					continue
+				target[size] = codepoint
+				size+=1
+			
+			# 4 bytes
+			elif (byte1 & 0xF8) == 0xF0:
+				if i >= length:
+					interim[0] = byte1
+					return size
+				byte2 = input[i]
+				i += 1
+				if (byte2 & 0xC0) != 0x80:
+					# wrong continuation
+					i -= 1
+					continue
+				if i >= length:
+					interim[0] = byte1
+					interim[1] = byte2
+					return size
+				byte3 = input[i]
+				i += 1
+				if (byte3 & 0xC0) != 0x80:
+					# wrong continuation
+					i -= 1
+					continue
+				if i >= length:
+					interim[0] = byte1
+					interim[1] = byte2
+					interim[2] = byte3
+					return size
+				byte4 = input[i]
+				i += 1
+				if (byte4 & 0xC0) != 0x80:
+					# wrong continuation
+					i -= 1
+					continue
+				codepoint = (byte1 & 0x07) << 18 | (byte2 & 0x3F) << 12 | (byte3 & 0x3F) << 6 | (byte4 & 0x3F)
+				if codepoint < 0x010000 or codepoint > 0x10FFFF:
+					# illegal codepoint, no i-- here
+					continue
+				target[size] = codepoint
+				size += 1
+			else:
+				# illegal byte, just skip
+				pass
+		
+		target.resize(size)
+		return size