File size: 7,512 Bytes
81b3c9e
2d12348
 
 
 
 
 
 
 
 
 
 
 
81b3c9e
2d12348
 
81b3c9e
2d12348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7df626b
2d12348
 
 
 
 
7df626b
2d12348
 
 
 
 
7df626b
 
 
2d12348
 
 
 
 
7df626b
 
81b3c9e
 
 
7df626b
81b3c9e
 
 
 
7df626b
 
e56f928
 
 
 
 
 
 
 
 
81b3c9e
 
 
 
 
 
 
 
e56f928
 
81b3c9e
e56f928
81b3c9e
 
e56f928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81b3c9e
e56f928
 
 
 
 
 
 
 
 
 
 
 
 
81b3c9e
e56f928
 
81b3c9e
e56f928
 
 
 
 
81b3c9e
e56f928
 
 
81b3c9e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import torch
from diffusers import DiffusionPipeline

"""### **Arabic: Text-Generation:**
Generate a poetry in Arabic.
"""

pipe_ar = pipeline('text-generation', framework='pt', model='akhooli/ap2023', tokenizer='akhooli/ap2023')

"""### **English: Text-Generation:**
Generate a poetry in English.
"""

pipe_en = pipeline("text-generation", model="ashiqabdulkhader/GPT2-Poet", from_tf=True)

"""### **Arabic and English: Text-To-Speech:**
Convert the Arabic/English poetry to speech.
"""

# Initialize text-to-speech models for Arabic and English
# Arabic: text-to-speech
synthesiser_arabic = pipeline("text-to-speech", model="MBZUAI/speecht5_tts_clartts_ar")
embeddings_dataset_arabic = load_dataset("herwoww/arabic_xvector_embeddings", split="validation")
speaker_embedding_arabic = torch.tensor(embeddings_dataset_arabic[105]["speaker_embeddings"]).unsqueeze(0)

# English: text-to-speech
synthesiser_english = pipeline("text-to-speech", model="microsoft/speecht5_tts")
embeddings_dataset_english = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding_english = torch.tensor(embeddings_dataset_english[7306]["xvector"]).unsqueeze(0)

"""### **English Text-To-Image:**
Convert the starter of the English poetry to an image.
"""

pipe_image = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")

"""### **Translator from Arabic to English:**
The text-to-image model doesn't support Arabic, therefore we need to translate the starter of the Arabic poetry to English in order to generate image.
"""

pipe_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")

"""### **Primary Function:**
This function will receive 2 inputs from the Gradio interface, and execute the following functions and return 3 outputs:
1. The generated poem.
2. The audio.
3. The image.
"""

# Generate poem based on language and convert it to audio and image
def generate_poem(selected_language, text):
    try:
        if selected_language == "English":
            poem = generate_poem_english(text) # Return the generated poem from the generate_poem_english function
            sampling_rate, audio_data = text_to_speech_english(poem) # Return the audio from the text_to_speech_english function
            image = generate_image_from_poem(text) # Return the image from the generate_image_from_poem function
        elif selected_language == "Arabic":
            poem = generate_poem_arabic(text) # Return the generated poem from the generate_poem_arabic function
            sampling_rate, audio_data = text_to_speech_arabic(poem) # Return the audio from the text_to_speech_arabic function
            translated_text = translate_arabic_to_english(text) # Return the translated poem from Arabic to English
            image = generate_image_from_poem(translated_text) # Return the image from the generate_image_from_poem function

        return poem, (sampling_rate, audio_data), image
    except Exception as e:
        return f"Error: {str(e)}", None, None

"""### **Poem Generation Function:**
This function is responsible for generating a poem (text) in Arabic or English, based on the provided text.
"""

# Poem generation for Arabic
def generate_poem_arabic(text):
    temp = 1.0
    topk = 50
    topp = 0.9
    penalty = 1.2
    generated_text = pipe_ar(text, max_length=96, do_sample=True, temperature=temp, top_k=topk, top_p=topp, repetition_penalty=penalty,
                             min_length=64, no_repeat_ngram_size=3, return_full_text=True,
                             num_beams=5, num_return_sequences=1)[0]["generated_text"]
    clean_text = generated_text.replace("-", "") # To get rid of the dashes generated by the model.
    return clean_text

# Poem generation for English
def generate_poem_english(text):
    generated_text = pipe_en(text, do_sample=True, max_length=100, top_k=50, top_p=0.9, temperature=1.0, num_return_sequences=3)[0]['generated_text']
    clean_text = generated_text.replace("</s>", "") # To get rid of the </s> generated by the model.
    return clean_text

"""### **Audio Function:**
This function is responsible for generating audio in Arabic or English, based on the provided text.
"""

# Text-to-speech conversion for Arabic
def text_to_speech_arabic(text):
    speech = synthesiser_arabic(text, forward_params={"speaker_embeddings": speaker_embedding_arabic})
    audio_data = speech["audio"]
    sampling_rate = speech["sampling_rate"]
    return (sampling_rate, audio_data)

# Text-to-speech conversion for English
def text_to_speech_english(text):
    speech = synthesiser_english(text, forward_params={"speaker_embeddings": speaker_embedding_english})
    audio_data = speech["audio"]
    sampling_rate = speech["sampling_rate"]
    return (sampling_rate, audio_data)

"""### **Image Function:**
This function is responsible for generating an image based on the provided text.
"""

# Image generation function
def generate_image_from_poem(poem_text):
    image = pipe_image(poem_text).images[0]
    return image

"""### **Translation Function:**
This function is responsible for translating Arabic input to English, to be used for the image function, which accepts only English inputs.
"""

# Translation function from Arabic to English
def translate_arabic_to_english(text):
    translated_text = pipe_translator(text)[0]['translation_text']
    return translated_text

"""### **CSS Styling:**"""

custom_css = """
body {
    background-color: #f4f4f9;
    color: #333;
}
.gradio-container {
    border-radius: 10px;
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
    background-color: #fff;
}
label {
    color: #4A90E2;
    font-weight: bold;
}

input[type="text"],
textarea {
    border: 1px solid #4A90E2;
}
textarea {
    height: 150px;
}

button {
    background-color: #4A90E2;
    color: #fff;
    border-radius: 5px;
    cursor: pointer;
}
button:hover {
    background-color: #357ABD;
}

.dropdown {
    border: 1px solid #4A90E2;
    border-radius: 4px;
}

"""

"""### **Examples for Gradio:**
Provide 4 predefined inputs to demonstrate how the interface works.
"""

examples = [
    # First parameter is for the dropdown menu, and the second parameter is for the starter of the poem
    ["English", "The shining sun rises over the calm ocean"],
    ["Arabic", "الورود تتفتح في الربيع"],
    ["English", "The night sky is filled with stars and dreams"],
    ["Arabic", "اشعة الشمس المشرقة"]
]

"""### **Gradio Interface:**
Creating a Gradio interface to generate a poem, read the poem, and generate an image based on that poem.
"""

my_model = gr.Interface(
    fn=generate_poem,  # The primary function that will receive the inputs (language and the starter of the poem)
    inputs=[
        gr.Dropdown(["English", "Arabic"], label="Select Language"), # Dropdown menu to select the language, either "English" or "Arabic"
        gr.Textbox(label="Enter a sentence")  # Textbox where the user will input a sentence or phrase to generate the poem
    ],

    outputs=[
        gr.Textbox(label="Generated Poem", lines=10),  # Textbox to display the generated poem
        gr.Audio(label="Generated Audio", type="numpy"),  # Audio output for the generated poem
        gr.Image(label="Generated Image")  # Image output for the generated image
    ],

    examples=examples,  # Predefined examples to guide the user
    css=custom_css  # Applying custom CSS
)
my_model.launch()