Coqui-Xtts-Demo / utils /sentence.py
Jimmy Vu
Update split_sentence
0210dff
def split_sentence(sentence, max_text_length=180, delimiters=",;-!?"):
"""
Splits a sentence into two halves, prioritizing the delimiter closest to the middle.
If no delimiter is found, it ensures words are not split in the middle.
Args:
sentence (str): The input sentence to split.
delimiters (str): A string of delimiters to prioritize for splitting (default: ",;!?").
Returns:
tuple: A tuple containing the two halves of the sentence.
"""
if len(sentence) < max_text_length:
return [sentence]
# Find all delimiter indices in the sentence
delimiter_indices = [i for i, char in enumerate(sentence) if char in delimiters]
if delimiter_indices:
# Calculate the midpoint of the sentence
midpoint = len(sentence) // 2
# Find the delimiter closest to the midpoint
closest_delimiter = min(delimiter_indices, key=lambda x: abs(x - midpoint))
# Split at the closest delimiter
first_half = sentence[:closest_delimiter].strip()
second_half = sentence[closest_delimiter + 1:].strip()
else:
# If no delimiter, split at the nearest space (word boundary)
midpoint = len(sentence) // 2
# Find the nearest space (word boundary) around the midpoint
left_space = sentence.rfind(" ", 0, midpoint)
right_space = sentence.find(" ", midpoint)
# Choose the closest space to the midpoint
if left_space == -1 and right_space == -1:
# No spaces found (single word), split at midpoint
split_index = midpoint
elif left_space == -1:
# Only right space found
split_index = right_space
elif right_space == -1:
# Only left space found
split_index = left_space
else:
# Choose the closest space to the midpoint
split_index = left_space if (midpoint - left_space) <= (right_space - midpoint) else right_space
# Split the sentence into two parts
first_half = sentence[:split_index].strip()
second_half = sentence[split_index:].strip()
return split_sentence(first_half, max_text_length=max_text_length) \
+ split_sentence(second_half, max_text_length=max_text_length)
def merge_sentences(sentences):
""" handling short sentences by merging them to next/prev ones """
merged_sentences = []
i = 0
while i < len(sentences):
s = sentences[i]
word_count = len(s.split())
j = 1
# merge the short sentence to the next one until long enough
while word_count <= 6 and i+j < len(sentences):
s += ' ' + sentences[i+j]
word_count = len(s.split())
j += 1
merged_sentences.append(s)
i += j
# merge the last one to the prev one until long enough
while len(merged_sentences) > 1 and len(merged_sentences[len(merged_sentences) - 1].split()) < 6:
merged_sentences[len(merged_sentences) - 2] += ' ' + merged_sentences[len(merged_sentences) - 1]
merged_sentences.pop()
return merged_sentences