Hamed744 commited on
Commit
a2bb79a
·
verified ·
1 Parent(s): 9f44233

Update src/lib/multimodal-live-client.ts

Browse files
Files changed (1) hide show
  1. src/lib/multimodal-live-client.ts +107 -149
src/lib/multimodal-live-client.ts CHANGED
@@ -14,7 +14,7 @@
14
  * limitations under the License.
15
  */
16
 
17
- import { Content, GenerativeContentBlob, Part } from "@google/generative-ai";
18
  import { EventEmitter } from "eventemitter3";
19
  import { difference } from "lodash";
20
  import {
@@ -39,12 +39,37 @@ import {
39
  } from "../multimodal-live-types";
40
  import { blobToJSON, base64ToArrayBuffer } from "./utils";
41
 
42
- /**
43
- * the events that this client will emit
44
- */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  interface MultimodalLiveClientEventTypes {
46
  open: () => void;
47
- log: (log: StreamingLog) => void;
48
  close: (event: CloseEvent) => void;
49
  audio: (data: ArrayBuffer) => void;
50
  content: (data: ServerContent) => void;
@@ -60,34 +85,32 @@ export type MultimodalLiveAPIClientConnection = {
60
  apiKey?: string;
61
  };
62
 
63
- /**
64
- * A event-emitting class that manages the connection to the websocket and emits
65
- * events to the rest of the application.
66
- * If you dont want to use react you can still use this.
67
- */
68
  export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEventTypes> {
69
  public ws: WebSocket | null = null;
70
  protected config: LiveConfig | null = null;
71
  public url: string;
 
 
 
 
 
 
72
 
73
  constructor({ url, apiKey }: MultimodalLiveAPIClientConnection = {}) {
74
  super();
75
- console.log('🔧 Initializing MultimodalLiveClient with URL:', url || `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`);
76
  this.url = url || `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`;
77
- }
78
-
79
- log(type: string, message: StreamingLog["message"], count?: number) {
80
- const log: StreamingLog = {
81
- date: new Date(),
82
- type,
83
- message,
84
- count,
85
  };
86
- this.emit("log", log);
87
  }
88
 
 
 
89
  connect(config: LiveConfig): Promise<boolean> {
90
- console.log('🔌 Attempting WebSocket connection to:', this.url);
91
  this.config = config;
92
 
93
  if (this.ws) {
@@ -105,16 +128,15 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
105
  this.receiveParsed(response);
106
  } catch (e) {
107
  console.error("Error parsing received binary message:", e);
108
- this.log("error", "Error parsing received binary message");
109
  }
110
  } else {
111
- console.log("Received non-binary message (may be unexpected):", evt.data);
112
  try {
113
  const response: LiveIncomingMessage = JSON.parse(evt.data);
114
  this.receiveParsed(response);
115
  } catch(e) {
116
  console.error("Error parsing received text message:", e, evt.data);
117
- this.log("error", `Error parsing text message: ${evt.data}`);
118
  }
119
  }
120
  });
@@ -123,46 +145,35 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
123
  const onError = (ev: Event) => {
124
  const message = `Could not connect to "${this.url}"`;
125
  console.error("WebSocket connection error:", message, ev);
126
- this.log(`error.connect`, message);
127
  reject(new Error(message));
128
  };
129
  ws.addEventListener("error", onError);
130
 
131
  ws.addEventListener("open", (ev: Event) => {
132
- console.log('✅ WebSocket connection opened successfully');
133
- if (!this.config) {
134
- console.error("❌ Config not set when WebSocket opened!");
135
- reject("Invalid config state during WebSocket open");
136
- return;
137
- }
138
- this.log(`client.${ev.type}`, `connected to socket`);
139
  this.emit("open");
140
 
141
  this.ws = ws;
142
 
143
- // 1. ارسال پیام Setup اولیه
 
 
 
 
 
 
144
  const setupMessage: SetupMessage = {
145
  setup: this.config,
146
  };
147
  this._sendDirect(setupMessage);
148
- this.log("client.send.setup", setupMessage);
149
 
150
- // --- 👇 بخش اضافه شده برای ارسال پیام تحریک کننده 👇 ---
151
- // 2. ارسال پیام تحریک کننده برای شروع خوشامدگویی
152
- const triggerGreetingMessage: ClientContentMessage = {
153
- clientContent: {
154
- turns: [{ role: 'user', parts: [{ text: '__START_GREETING__' }] }],
155
- turnComplete: true // <<--- به true تغییر یافت
156
- }
157
- };
158
- this._sendDirect(triggerGreetingMessage);
159
- this.log("client.send.trigger", triggerGreetingMessage);
160
- // --- 👆 پایان بخش اضافه شده 👆 ---
161
 
162
  ws.removeEventListener("error", onError);
163
 
164
  ws.addEventListener("close", (ev: CloseEvent) => {
165
- console.log('🔌 WebSocket closed:', ev.code, ev.reason);
166
  let reason = ev.reason || "";
167
  if (reason.toLowerCase().includes("error")) {
168
  const prelude = "ERROR]";
@@ -174,8 +185,7 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
174
  );
175
  }
176
  }
177
- console.log('📝 Close reason processed:', reason || 'No reason provided');
178
- this.log(
179
  `server.close`,
180
  `disconnected ${reason ? `with reason: ${reason}` : `(code: ${ev.code})`}`,
181
  ev.code
@@ -189,181 +199,129 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
189
  }
190
 
191
  disconnect(ws?: WebSocket) {
192
- console.log('🔌 Attempting to disconnect WebSocket...');
193
  const wsToClose = ws || this.ws;
194
-
195
  if (wsToClose && wsToClose.readyState !== WebSocket.CLOSED && wsToClose.readyState !== WebSocket.CLOSING) {
196
- console.log(`🔒 Closing WebSocket connection (readyState: ${wsToClose.readyState})`);
197
  wsToClose.close();
198
  if (this.ws === wsToClose) {
199
  this.ws = null;
200
  }
201
  return true;
202
- } else if (wsToClose) {
203
- console.log(`⚠️ WebSocket already closing or closed (readyState: ${wsToClose.readyState})`);
204
- } else {
205
- console.log('⚠️ No active WebSocket reference to disconnect');
206
- }
207
-
208
- if (this.ws === wsToClose) {
209
- this.ws = null;
210
  }
211
-
212
  return false;
213
  }
214
 
215
- // متد برای پردازش پیام‌های parse شده
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  protected receiveParsed(response: LiveIncomingMessage) {
217
- this.log("server.receive", response); // لاگ کردن کل پیام دریافتی
 
218
 
219
  if (isToolCallMessage(response)) {
220
- this.log("server.toolCall", response);
221
  this.emit("toolcall", response.toolCall);
222
  return;
223
  }
224
  if (isToolCallCancellationMessage(response)) {
225
- this.log("server.toolCallCancellation", response);
226
  this.emit("toolcallcancellation", response.toolCallCancellation);
227
  return;
228
  }
229
-
230
  if (isSetupCompleteMessage(response)) {
231
- this.log("server.setupComplete", response);
232
  this.emit("setupcomplete");
233
  return;
234
  }
235
-
236
  if (isServerContentMessage(response)) {
237
  const { serverContent } = response;
238
  if (isInterrupted(serverContent)) {
239
- this.log("server.interrupted", response);
240
  this.emit("interrupted");
241
  return;
242
  }
243
  if (isTurnComplete(serverContent)) {
244
- this.log("server.turnComplete", response);
245
  this.emit("turncomplete");
246
  }
247
-
248
  if (isModelTurn(serverContent)) {
249
  let parts: Part[] = serverContent.modelTurn.parts;
250
-
251
- const audioParts = parts.filter(
252
- (p) => p.inlineData && p.inlineData.mimeType.startsWith("audio/"),
253
- );
254
- const base64s = audioParts.map((p) => p.inlineData?.data);
255
-
256
  const otherParts = difference(parts, audioParts);
257
-
258
  let audioByteLength = 0;
259
- base64s.forEach((b64) => {
260
- if (b64) {
261
  try {
262
- const data = base64ToArrayBuffer(b64);
263
- this.emit("audio", data);
264
  audioByteLength += data.byteLength;
265
  } catch (e) {
266
  console.error("Error decoding base64 audio:", e);
267
- this.log("error", "Error decoding base64 audio");
268
  }
269
  }
270
  });
271
- if (audioByteLength > 0) {
272
- this.log(`server.audio`, `buffer`, audioByteLength);
273
- }
274
-
275
- if (!otherParts.length) {
276
- return;
277
- }
278
 
279
  parts = otherParts;
280
  const modelTurnContent: ModelTurn = { modelTurn: { parts } };
281
  this.emit("content", modelTurnContent);
282
- this.log(`server.content`, response);
 
283
  }
284
  } else {
 
285
  console.log("Received unrecognized message structure:", response);
286
- this.log("server.unknown", response);
287
  }
288
  }
289
 
290
-
291
- /**
292
- * send realtimeInput, this is base64 chunks of "audio/pcm" and/or "image/jpg"
293
- */
294
  sendRealtimeInput(chunks: GenerativeContentBlob[]) {
295
- let hasAudio = false;
296
- let hasVideo = false;
297
- for (let i = 0; i < chunks.length; i++) {
298
- const ch = chunks[i];
299
- if (ch.mimeType.includes("audio")) {
300
- hasAudio = true;
301
- }
302
- if (ch.mimeType.includes("image")) {
303
- hasVideo = true;
304
- }
305
- if (hasAudio && hasVideo) {
306
- break;
307
- }
308
- }
309
- const message =
310
- hasAudio && hasVideo
311
- ? "audio + video"
312
- : hasAudio
313
- ? "audio"
314
- : hasVideo
315
- ? "video"
316
- : "unknown";
317
-
318
  const data: RealtimeInputMessage = {
319
- realtimeInput: {
320
- mediaChunks: chunks,
321
- },
322
  };
323
  this._sendDirect(data);
324
- this.log(`client.realtimeInput.${message}`, data, chunks.length);
325
  }
326
 
327
- /**
328
- * send a response to a function call and provide the id of the functions you are responding to
329
- */
330
  sendToolResponse(toolResponse: ToolResponseMessage["toolResponse"]) {
331
- const message: ToolResponseMessage = {
332
- toolResponse,
333
- };
334
  this._sendDirect(message);
335
- this.log(`client.toolResponse`, message);
336
  }
337
 
338
- /**
339
- * send normal content parts such as { text }
340
- */
341
  send(parts: Part | Part[], turnComplete: boolean = true) {
342
  parts = Array.isArray(parts) ? parts : [parts];
343
- const content: Content = {
344
- role: "user",
345
- parts,
346
- };
347
-
348
  const clientContentRequest: ClientContentMessage = {
349
- clientContent: {
350
- turns: [content],
351
- turnComplete,
352
- },
353
  };
354
-
355
  this._sendDirect(clientContentRequest);
356
- this.log(`client.send.content`, clientContentRequest);
357
  }
358
 
359
- /**
360
- * used internally to send all messages
361
- * don't use directly unless trying to send an unsupported message type
362
- */
363
  _sendDirect(request: object) {
364
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
365
  console.error("WebSocket is not connected or not open. Cannot send message:", request);
366
- this.log("error", "Attempted to send message while WebSocket not open");
367
  return;
368
  }
369
  try {
@@ -371,7 +329,7 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
371
  this.ws.send(str);
372
  } catch (error) {
373
  console.error("Error stringifying or sending message:", error, request);
374
- this.log("error", `Error sending message: ${error}`);
375
  }
376
  }
377
  }
 
14
  * limitations under the License.
15
  */
16
 
17
+ import { Content, GenerativeContentBlob, Part, InlineDataPart } from "@google/generative-ai";
18
  import { EventEmitter } from "eventemitter3";
19
  import { difference } from "lodash";
20
  import {
 
39
  } from "../multimodal-live-types";
40
  import { blobToJSON, base64ToArrayBuffer } from "./utils";
41
 
42
+ function arrayBufferToBase64(buffer: ArrayBuffer): string {
43
+ let binary = '';
44
+ const bytes = new Uint8Array(buffer);
45
+ const len = bytes.byteLength;
46
+ for (let i = 0; i < len; i++) {
47
+ binary += String.fromCharCode(bytes[i]);
48
+ }
49
+ if (typeof btoa === 'function') {
50
+ return btoa(binary);
51
+ } else if (typeof Buffer !== 'undefined') {
52
+ return Buffer.from(buffer).toString('base64');
53
+ } else {
54
+ throw new Error("Cannot convert ArrayBuffer to Base64 in this environment.");
55
+ }
56
+ }
57
+
58
+ function isInlineDataPart(part: Part): part is InlineDataPart {
59
+ return (
60
+ typeof part === 'object' &&
61
+ part !== null &&
62
+ 'inlineData' in part &&
63
+ typeof (part as any).inlineData === 'object' &&
64
+ (part as any).inlineData !== null &&
65
+ typeof (part as any).inlineData.mimeType === 'string' &&
66
+ typeof (part as any).inlineData.data === 'string'
67
+ );
68
+ }
69
+
70
  interface MultimodalLiveClientEventTypes {
71
  open: () => void;
72
+ log: (log: StreamingLog) => void; // این هنوز وجود دارد تا side-panel کار کند
73
  close: (event: CloseEvent) => void;
74
  audio: (data: ArrayBuffer) => void;
75
  content: (data: ServerContent) => void;
 
85
  apiKey?: string;
86
  };
87
 
 
 
 
 
 
88
  export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEventTypes> {
89
  public ws: WebSocket | null = null;
90
  protected config: LiveConfig | null = null;
91
  public url: string;
92
+ private readonly AUDIO_SAMPLE_RATE = 16000;
93
+ private readonly AUDIO_MIME_TYPE_BASE = `audio/l16`;
94
+ private readonly AUDIO_MIME_TYPE_WITH_RATE = `${this.AUDIO_MIME_TYPE_BASE};rate=${this.AUDIO_SAMPLE_RATE}`;
95
+
96
+ // --- 👇 لاگر داخلی برای ارسال به پنل لاگ، بدون console.log زیاد 👇 ---
97
+ private logger: ((type: string, message: StreamingLog["message"], count?: number) => void) | null = null;
98
 
99
  constructor({ url, apiKey }: MultimodalLiveAPIClientConnection = {}) {
100
  super();
101
+ console.log('🔧 Initializing MultimodalLiveClient...'); // فقط لاگ اولیه
102
  this.url = url || `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`;
103
+ // اتصال logger داخلی به emit کردن رویداد log
104
+ this.logger = (type, message, count) => {
105
+ const logEntry: StreamingLog = { date: new Date(), type, message, count };
106
+ this.emit("log", logEntry);
 
 
 
 
107
  };
 
108
  }
109
 
110
+ // تابع log عمومی حذف شد، از logger داخلی استفاده می‌شود
111
+
112
  connect(config: LiveConfig): Promise<boolean> {
113
+ this.logger?.("info", "Attempting WebSocket connection...", undefined);
114
  this.config = config;
115
 
116
  if (this.ws) {
 
128
  this.receiveParsed(response);
129
  } catch (e) {
130
  console.error("Error parsing received binary message:", e);
131
+ this.logger?.("error", "Error parsing received binary message", undefined);
132
  }
133
  } else {
 
134
  try {
135
  const response: LiveIncomingMessage = JSON.parse(evt.data);
136
  this.receiveParsed(response);
137
  } catch(e) {
138
  console.error("Error parsing received text message:", e, evt.data);
139
+ this.logger?.("error", `Error parsing text message: ${evt.data ? evt.data.substring(0, 100) + '...' : 'empty'}`, undefined);
140
  }
141
  }
142
  });
 
145
  const onError = (ev: Event) => {
146
  const message = `Could not connect to "${this.url}"`;
147
  console.error("WebSocket connection error:", message, ev);
148
+ this.logger?.(`error.connect`, message, undefined);
149
  reject(new Error(message));
150
  };
151
  ws.addEventListener("error", onError);
152
 
153
  ws.addEventListener("open", (ev: Event) => {
154
+ this.logger?.(`client.${ev.type}`, `connected to socket`, undefined);
 
 
 
 
 
 
155
  this.emit("open");
156
 
157
  this.ws = ws;
158
 
159
+ if (!this.config) { // این چک باید بماند
160
+ console.error("❌ Config not set when WebSocket opened!");
161
+ this.logger?.("error", "Config not set when WebSocket opened!", undefined);
162
+ reject("Invalid config state during WebSocket open");
163
+ return;
164
+ }
165
+
166
  const setupMessage: SetupMessage = {
167
  setup: this.config,
168
  };
169
  this._sendDirect(setupMessage);
170
+ this.logger?.("client.send.setup", setupMessage, undefined); // ارسال به پنل لاگ
171
 
172
+ // --- پیام‌های تحریک کننده و صدای سکوت حذف شده‌اند ---
 
 
 
 
 
 
 
 
 
 
173
 
174
  ws.removeEventListener("error", onError);
175
 
176
  ws.addEventListener("close", (ev: CloseEvent) => {
 
177
  let reason = ev.reason || "";
178
  if (reason.toLowerCase().includes("error")) {
179
  const prelude = "ERROR]";
 
185
  );
186
  }
187
  }
188
+ this.logger?.(
 
189
  `server.close`,
190
  `disconnected ${reason ? `with reason: ${reason}` : `(code: ${ev.code})`}`,
191
  ev.code
 
199
  }
200
 
201
  disconnect(ws?: WebSocket) {
 
202
  const wsToClose = ws || this.ws;
 
203
  if (wsToClose && wsToClose.readyState !== WebSocket.CLOSED && wsToClose.readyState !== WebSocket.CLOSING) {
204
+ this.logger?.("info", `Closing WebSocket connection (readyState: ${wsToClose.readyState})`, undefined);
205
  wsToClose.close();
206
  if (this.ws === wsToClose) {
207
  this.ws = null;
208
  }
209
  return true;
 
 
 
 
 
 
 
 
210
  }
 
211
  return false;
212
  }
213
 
214
+ private createSilentAudioChunk(durationMs: number): Part {
215
+ // این تابع دیگر استفاده نمی‌شود اما برای کامل بودن نگه داشته شده
216
+ const bytesPerSample = 2;
217
+ const numberOfSamples = Math.floor(this.AUDIO_SAMPLE_RATE * (durationMs / 1000));
218
+ const bufferSize = numberOfSamples * bytesPerSample;
219
+ const buffer = new ArrayBuffer(bufferSize);
220
+ const base64Data = arrayBufferToBase64(buffer);
221
+ return {
222
+ inlineData: {
223
+ mimeType: this.AUDIO_MIME_TYPE_WITH_RATE,
224
+ data: base64Data,
225
+ }
226
+ };
227
+ }
228
+
229
  protected receiveParsed(response: LiveIncomingMessage) {
230
+ // --- 👇 لاگ دریافت پیام حذف شد 👇 ---
231
+ // this.logger?.("server.receive", response);
232
 
233
  if (isToolCallMessage(response)) {
234
+ this.logger?.("server.toolCall", response, undefined);
235
  this.emit("toolcall", response.toolCall);
236
  return;
237
  }
238
  if (isToolCallCancellationMessage(response)) {
239
+ this.logger?.("server.toolCallCancellation", response, undefined);
240
  this.emit("toolcallcancellation", response.toolCallCancellation);
241
  return;
242
  }
 
243
  if (isSetupCompleteMessage(response)) {
244
+ this.logger?.("server.setupComplete", response, undefined);
245
  this.emit("setupcomplete");
246
  return;
247
  }
 
248
  if (isServerContentMessage(response)) {
249
  const { serverContent } = response;
250
  if (isInterrupted(serverContent)) {
251
+ this.logger?.("server.interrupted", response, undefined);
252
  this.emit("interrupted");
253
  return;
254
  }
255
  if (isTurnComplete(serverContent)) {
256
+ this.logger?.("server.turnComplete", response, undefined);
257
  this.emit("turncomplete");
258
  }
 
259
  if (isModelTurn(serverContent)) {
260
  let parts: Part[] = serverContent.modelTurn.parts;
261
+ const audioParts = parts.filter(isInlineDataPart)
262
+ .filter(part => part.inlineData.mimeType.startsWith(this.AUDIO_MIME_TYPE_BASE));
 
 
 
 
263
  const otherParts = difference(parts, audioParts);
 
264
  let audioByteLength = 0;
265
+ audioParts.forEach((part) => {
266
+ if (part.inlineData.data) {
267
  try {
268
+ const data = base64ToArrayBuffer(part.inlineData.data);
269
+ this.emit("audio", data); // رویداد صدا هنوز emit می‌شود
270
  audioByteLength += data.byteLength;
271
  } catch (e) {
272
  console.error("Error decoding base64 audio:", e);
273
+ this.logger?.("error", "Error decoding base64 audio", undefined);
274
  }
275
  }
276
  });
277
+ // --- 👇 لاگ بایت صدا حذف شد 👇 ---
278
+ // if (audioByteLength > 0) {
279
+ // this.logger?.(`server.audio`, `buffer`, audioByteLength);
280
+ // }
281
+ if (!otherParts.length) return;
 
 
282
 
283
  parts = otherParts;
284
  const modelTurnContent: ModelTurn = { modelTurn: { parts } };
285
  this.emit("content", modelTurnContent);
286
+ // --- 👇 لاگ محتوای غیرصوتی حذف شد 👇 ---
287
+ // this.logger?.(`server.content`, response);
288
  }
289
  } else {
290
+ // لاگ پیام ناشناخته را نگه می‌داریم
291
  console.log("Received unrecognized message structure:", response);
292
+ this.logger?.("server.unknown", response, undefined);
293
  }
294
  }
295
 
 
 
 
 
296
  sendRealtimeInput(chunks: GenerativeContentBlob[]) {
297
+ // --- 👇 لاگ ارسال صدا حذف شد 👇 ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  const data: RealtimeInputMessage = {
299
+ realtimeInput: { mediaChunks: chunks },
 
 
300
  };
301
  this._sendDirect(data);
302
+ // this.logger?.(`client.realtimeInput`, `audio/video chunks`, chunks.length);
303
  }
304
 
 
 
 
305
  sendToolResponse(toolResponse: ToolResponseMessage["toolResponse"]) {
306
+ const message: ToolResponseMessage = { toolResponse };
 
 
307
  this._sendDirect(message);
308
+ this.logger?.(`client.toolResponse`, message, undefined); // این لاگ معمولا کم تکرار است
309
  }
310
 
 
 
 
311
  send(parts: Part | Part[], turnComplete: boolean = true) {
312
  parts = Array.isArray(parts) ? parts : [parts];
313
+ const content: Content = { role: "user", parts };
 
 
 
 
314
  const clientContentRequest: ClientContentMessage = {
315
+ clientContent: { turns: [content], turnComplete },
 
 
 
316
  };
 
317
  this._sendDirect(clientContentRequest);
318
+ this.logger?.(`client.send.content`, clientContentRequest, undefined); // این لاگ هم معمولا کم تکرار است
319
  }
320
 
 
 
 
 
321
  _sendDirect(request: object) {
322
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
323
  console.error("WebSocket is not connected or not open. Cannot send message:", request);
324
+ this.logger?.("error", "Attempted to send message while WebSocket not open", undefined);
325
  return;
326
  }
327
  try {
 
329
  this.ws.send(str);
330
  } catch (error) {
331
  console.error("Error stringifying or sending message:", error, request);
332
+ this.logger?.("error", `Error sending message: ${error}`, undefined);
333
  }
334
  }
335
  }