Coqui-Xtts-Demo

Sleeping

Jimmy Vu

Update split_sentence

0210dff 5 months ago

3.13 kB


	def split_sentence(sentence, max_text_length=180, delimiters=",;-!?"):
	"""
	Splits a sentence into two halves, prioritizing the delimiter closest to the middle.
	If no delimiter is found, it ensures words are not split in the middle.

	Args:
	sentence (str): The input sentence to split.
	delimiters (str): A string of delimiters to prioritize for splitting (default: ",;!?").

	Returns:
	tuple: A tuple containing the two halves of the sentence.
	"""
	if len(sentence) < max_text_length:
	return [sentence]

	# Find all delimiter indices in the sentence
	delimiter_indices = [i for i, char in enumerate(sentence) if char in delimiters]

	if delimiter_indices:
	# Calculate the midpoint of the sentence
	midpoint = len(sentence) // 2

	# Find the delimiter closest to the midpoint
	closest_delimiter = min(delimiter_indices, key=lambda x: abs(x - midpoint))

	# Split at the closest delimiter
	first_half = sentence[:closest_delimiter].strip()
	second_half = sentence[closest_delimiter + 1:].strip()
	else:
	# If no delimiter, split at the nearest space (word boundary)
	midpoint = len(sentence) // 2

	# Find the nearest space (word boundary) around the midpoint
	left_space = sentence.rfind(" ", 0, midpoint)
	right_space = sentence.find(" ", midpoint)

	# Choose the closest space to the midpoint
	if left_space == -1 and right_space == -1:
	# No spaces found (single word), split at midpoint
	split_index = midpoint
	elif left_space == -1:
	# Only right space found
	split_index = right_space
	elif right_space == -1:
	# Only left space found
	split_index = left_space
	else:
	# Choose the closest space to the midpoint
	split_index = left_space if (midpoint - left_space) <= (right_space - midpoint) else right_space

	# Split the sentence into two parts
	first_half = sentence[:split_index].strip()
	second_half = sentence[split_index:].strip()

	return split_sentence(first_half, max_text_length=max_text_length) \
	+ split_sentence(second_half, max_text_length=max_text_length)


	def merge_sentences(sentences):
	""" handling short sentences by merging them to next/prev ones """
	merged_sentences = []
	i = 0
	while i < len(sentences):
	s = sentences[i]
	word_count = len(s.split())
	j = 1
	# merge the short sentence to the next one until long enough
	while word_count <= 6 and i+j < len(sentences):
	s += ' ' + sentences[i+j]
	word_count = len(s.split())
	j += 1
	merged_sentences.append(s)
	i += j
	# merge the last one to the prev one until long enough
	while len(merged_sentences) > 1 and len(merged_sentences[len(merged_sentences) - 1].split()) < 6:
	merged_sentences[len(merged_sentences) - 2] += ' ' + merged_sentences[len(merged_sentences) - 1]
	merged_sentences.pop()
	return merged_sentences