JayLacoma commited on
Commit
3d7d263
·
verified ·
1 Parent(s): 928c7d9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +359 -0
app.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import requests
4
+ import numpy as np
5
+ import gradio as gr
6
+ from datetime import datetime, timedelta
7
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
8
+ import plotly.graph_objects as go
9
+ from plotly.subplots import make_subplots
10
+ import yfinance as yf
11
+
12
+ # Configuration
13
+ class Config:
14
+ FINNHUB_API_KEY = "cuj17q1r01qm7p9n307gcuj17q1r01qm7p9n3080"
15
+ DEFAULT_DAYS = 30 # Reduced from 365 to make it faster
16
+ DATA_DIR = "data"
17
+
18
+ @classmethod
19
+ def initialize(cls):
20
+ os.makedirs(cls.DATA_DIR, exist_ok=True)
21
+
22
+ Config.initialize()
23
+
24
+ # Simple sentiment analyzer
25
+ class SentimentAnalyzer:
26
+ def __init__(self):
27
+ self.analyzer = SentimentIntensityAnalyzer()
28
+
29
+ def analyze(self, text):
30
+ if not isinstance(text, str) or not text.strip():
31
+ return 0
32
+ return self.analyzer.polarity_scores(text)['compound']
33
+
34
+ # News fetcher and sentiment analyzer
35
+ class StockNewsAnalyzer:
36
+ def __init__(self, symbol):
37
+ self.symbol = symbol
38
+ self.sentiment_analyzer = SentimentAnalyzer()
39
+
40
+ def get_file_path(self, file_type):
41
+ return os.path.join(Config.DATA_DIR, f"{self.symbol}_{file_type}.csv")
42
+
43
+ def get_news(self, days=Config.DEFAULT_DAYS, force_refresh=False):
44
+ """Fetch news articles from Finnhub API"""
45
+ file_path = self.get_file_path("news")
46
+
47
+ # Return cached data if it exists and no refresh is forced
48
+ if os.path.exists(file_path) and not force_refresh:
49
+ try:
50
+ return pd.read_csv(file_path, parse_dates=['datetime'])
51
+ except Exception:
52
+ # If the file is corrupted, fetch fresh data
53
+ pass
54
+
55
+ # Calculate date range
56
+ end_date = datetime.now()
57
+ start_date = end_date - timedelta(days=days)
58
+
59
+ # Fetch from API
60
+ url = "https://finnhub.io/api/v1/company-news"
61
+ params = {
62
+ "symbol": self.symbol,
63
+ "from": start_date.strftime('%Y-%m-%d'),
64
+ "to": end_date.strftime('%Y-%m-%d'),
65
+ "token": Config.FINNHUB_API_KEY,
66
+ }
67
+
68
+ try:
69
+ response = requests.get(url, params=params, timeout=10)
70
+ data = response.json()
71
+
72
+ if not data or not isinstance(data, list):
73
+ return pd.DataFrame()
74
+
75
+ # Create DataFrame
76
+ df = pd.DataFrame(data)
77
+ if 'datetime' in df.columns:
78
+ df['datetime'] = pd.to_datetime(df['datetime'], unit='s')
79
+ # Save to CSV
80
+ df.to_csv(file_path, index=False)
81
+ return df
82
+ return pd.DataFrame()
83
+ except Exception as e:
84
+ print(f"Error fetching news: {e}")
85
+ return pd.DataFrame()
86
+
87
+ def analyze_news_sentiment(self, days=Config.DEFAULT_DAYS, force_refresh=False):
88
+ """Analyze sentiment from news articles"""
89
+ news_df = self.get_news(days, force_refresh)
90
+
91
+ if news_df.empty:
92
+ return None, None, None
93
+
94
+ # Add sentiment scores to headlines
95
+ if 'headline' in news_df.columns:
96
+ news_df['sentiment_score'] = news_df['headline'].apply(self.sentiment_analyzer.analyze)
97
+
98
+ # Add date column for daily aggregation
99
+ news_df['date'] = news_df['datetime'].dt.date
100
+ news_df['date'] = pd.to_datetime(news_df['date'])
101
+
102
+ # Get stock price for the same period
103
+ try:
104
+ start_date = news_df['date'].min() - timedelta(days=5) # Get a few days before for context
105
+ end_date = news_df['date'].max() + timedelta(days=1)
106
+ stock_data = yf.download(self.symbol, start=start_date, end=end_date, progress=False)
107
+ if not stock_data.empty and 'Close' in stock_data.columns:
108
+ stock_data = stock_data[['Close']]
109
+ stock_data.columns = ['close']
110
+ stock_data = stock_data.reset_index()
111
+ stock_data.rename(columns={'Date': 'date'}, inplace=True)
112
+ stock_data['date'] = pd.to_datetime(stock_data['date'].dt.date)
113
+ stock_data.set_index('date', inplace=True)
114
+ else:
115
+ stock_data = pd.DataFrame()
116
+ except Exception:
117
+ stock_data = pd.DataFrame()
118
+
119
+ # Group by date for daily sentiment
120
+ daily_sentiment = news_df.groupby('date').agg(
121
+ avg_sentiment=('sentiment_score', 'mean'),
122
+ article_count=('sentiment_score', 'count'),
123
+ positive_count=('sentiment_score', lambda x: sum(x > 0.05)),
124
+ negative_count=('sentiment_score', lambda x: sum(x < -0.05)),
125
+ neutral_count=('sentiment_score', lambda x: sum((x >= -0.05) & (x <= 0.05)))
126
+ ).reset_index()
127
+
128
+ # Sort news articles by sentiment (most positive and most negative)
129
+ news_df = news_df.sort_values('sentiment_score', ascending=False)
130
+
131
+ # Get top 5 positive and negative headlines
132
+ top_positive = news_df[news_df['sentiment_score'] > 0].head(5)
133
+ top_negative = news_df[news_df['sentiment_score'] < 0].tail(5)
134
+
135
+ # Return sentiment data and headlines
136
+ return daily_sentiment, stock_data, pd.concat([top_positive, top_negative])
137
+
138
+ return None, None, None
139
+
140
+ # Visualization Functions
141
+ def create_sentiment_overview(daily_sentiment, stock_data, top_headlines, symbol):
142
+ """Create a sentiment overview visualization"""
143
+ if daily_sentiment is None or daily_sentiment.empty:
144
+ return None
145
+
146
+ # Create figure with secondary y-axis
147
+ fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
148
+ row_heights=[0.7, 0.3], vertical_spacing=0.1)
149
+
150
+ # Add stock price if available
151
+ if not stock_data.empty:
152
+ fig.add_trace(
153
+ go.Scatter(
154
+ x=stock_data.index,
155
+ y=stock_data['close'],
156
+ name='Stock Price',
157
+ line=dict(color='#1f77b4', width=2)
158
+ ),
159
+ row=1, col=1, secondary_y=False
160
+ )
161
+
162
+ # Add daily sentiment score
163
+ fig.add_trace(
164
+ go.Scatter(
165
+ x=daily_sentiment['date'],
166
+ y=daily_sentiment['avg_sentiment'],
167
+ name='Sentiment Score',
168
+ line=dict(color='#ff7f0e', width=2)
169
+ ),
170
+ row=1, col=1, secondary_y=True
171
+ )
172
+
173
+ # Add article count as a bar
174
+ fig.add_trace(
175
+ go.Bar(
176
+ x=daily_sentiment['date'],
177
+ y=daily_sentiment['article_count'],
178
+ name='Article Count',
179
+ marker_color='rgba(135, 206, 235, 0.5)',
180
+ opacity=0.7
181
+ ),
182
+ row=2, col=1
183
+ )
184
+
185
+ # Add sentiment breakdown bars (positive, negative, neutral)
186
+ fig.add_trace(
187
+ go.Bar(
188
+ x=daily_sentiment['date'],
189
+ y=daily_sentiment['positive_count'],
190
+ name='Positive',
191
+ marker_color='rgba(0, 128, 0, 0.7)'
192
+ ),
193
+ row=2, col=1
194
+ )
195
+
196
+ fig.add_trace(
197
+ go.Bar(
198
+ x=daily_sentiment['date'],
199
+ y=daily_sentiment['negative_count'],
200
+ name='Negative',
201
+ marker_color='rgba(255, 0, 0, 0.7)'
202
+ ),
203
+ row=2, col=1
204
+ )
205
+
206
+ fig.add_trace(
207
+ go.Bar(
208
+ x=daily_sentiment['date'],
209
+ y=daily_sentiment['neutral_count'],
210
+ name='Neutral',
211
+ marker_color='rgba(128, 128, 128, 0.7)'
212
+ ),
213
+ row=2, col=1
214
+ )
215
+
216
+ # Update layout
217
+ fig.update_layout(
218
+ title=f"{symbol} News Sentiment Analysis",
219
+ template='plotly_white',
220
+ hovermode='x unified',
221
+ barmode='stack',
222
+ legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
223
+ height=700,
224
+ margin=dict(l=20, r=20, t=80, b=20)
225
+ )
226
+
227
+ # Update y-axis titles
228
+ fig.update_yaxes(title_text="Stock Price", row=1, col=1, secondary_y=False)
229
+ fig.update_yaxes(title_text="Sentiment Score", row=1, col=1, secondary_y=True)
230
+ fig.update_yaxes(title_text="Article Count", row=2, col=1)
231
+
232
+ return fig
233
+
234
+ def format_headlines(headlines_df):
235
+ """Format headlines with sentiment scores for display"""
236
+ if headlines_df is None or headlines_df.empty:
237
+ return "No headlines available."
238
+
239
+ # Sort by sentiment score (most positive first)
240
+ headlines_df = headlines_df.sort_values('sentiment_score', ascending=False)
241
+
242
+ result = "## Top Positive Headlines\n\n"
243
+ for _, row in headlines_df[headlines_df['sentiment_score'] > 0].head(5).iterrows():
244
+ date = row['datetime'].strftime('%Y-%m-%d')
245
+ sentiment = row['sentiment_score']
246
+ color = "green"
247
+ result += f"- **{date}** | [{row['headline']}]({row['url']}) | <span style='color:{color};'>*{sentiment:.2f}*</span>\n\n"
248
+
249
+ result += "## Top Negative Headlines\n\n"
250
+ for _, row in headlines_df[headlines_df['sentiment_score'] < 0].sort_values('sentiment_score').head(5).iterrows():
251
+ date = row['datetime'].strftime('%Y-%m-%d')
252
+ sentiment = row['sentiment_score']
253
+ color = "red"
254
+ result += f"- **{date}** | [{row['headline']}]({row['url']}) | <span style='color:{color};'>*{sentiment:.2f}*</span>\n\n"
255
+
256
+ return result
257
+
258
+ def create_summary(daily_sentiment, symbol):
259
+ """Create a text summary of sentiment analysis"""
260
+ if daily_sentiment is None or daily_sentiment.empty:
261
+ return f"No sentiment data available for {symbol}."
262
+
263
+ # Calculate overall sentiment statistics
264
+ avg_sentiment = daily_sentiment['avg_sentiment'].mean()
265
+ total_articles = daily_sentiment['article_count'].sum()
266
+ total_positive = daily_sentiment['positive_count'].sum()
267
+ total_negative = daily_sentiment['negative_count'].sum()
268
+ total_neutral = daily_sentiment['neutral_count'].sum()
269
+
270
+ # Determine sentiment trend
271
+ sentiment_trend = "neutral"
272
+ if avg_sentiment > 0.05:
273
+ sentiment_trend = "positive"
274
+ elif avg_sentiment < -0.05:
275
+ sentiment_trend = "negative"
276
+
277
+ # Create summary
278
+ summary = f"""
279
+ ## {symbol} Sentiment Summary
280
+
281
+ ### Overview
282
+ - **Overall Sentiment**: {sentiment_trend.title()} (Score: {avg_sentiment:.2f})
283
+ - **Total Articles**: {total_articles}
284
+ - **Date Range**: {daily_sentiment['date'].min().strftime('%Y-%m-%d')} to {daily_sentiment['date'].max().strftime('%Y-%m-%d')}
285
+
286
+ ### Sentiment Breakdown
287
+ - **Positive Articles**: {total_positive} ({total_positive/total_articles*100:.1f}%)
288
+ - **Negative Articles**: {total_negative} ({total_negative/total_articles*100:.1f}%)
289
+ - **Neutral Articles**: {total_neutral} ({total_neutral/total_articles*100:.1f}%)
290
+ """
291
+
292
+ return summary
293
+
294
+ # Gradio Interface
295
+ def analyze_stock_sentiment(symbol, days, refresh_data):
296
+ """Main function for Gradio interface"""
297
+ if not symbol:
298
+ return "Please enter a valid stock symbol.", None, "No headlines available."
299
+
300
+ # Make sure symbol is uppercase
301
+ symbol = symbol.upper().strip()
302
+
303
+ # Create analyzer
304
+ analyzer = StockNewsAnalyzer(symbol)
305
+
306
+ # Get sentiment data
307
+ daily_sentiment, stock_data, top_headlines = analyzer.analyze_news_sentiment(days, refresh_data)
308
+
309
+ if daily_sentiment is None or daily_sentiment.empty:
310
+ return f"No news data available for {symbol}. Try another symbol or increase the time range.", None, "No headlines available."
311
+
312
+ # Create visualization
313
+ sentiment_plot = create_sentiment_overview(daily_sentiment, stock_data, top_headlines, symbol)
314
+
315
+ # Generate summary
316
+ summary = create_summary(daily_sentiment, symbol)
317
+
318
+ # Format headlines
319
+ headlines = format_headlines(top_headlines)
320
+
321
+ return summary, sentiment_plot, headlines
322
+
323
+ # Build Gradio interface
324
+ def build_interface():
325
+ """Create the Gradio interface"""
326
+ with gr.Blocks(title="Stock Sentiment Analysis", theme=gr.themes.Soft()) as app:
327
+ gr.Markdown("# Stock News Sentiment Analysis")
328
+ gr.Markdown("Analyze the sentiment of news articles for any stock symbol")
329
+
330
+ with gr.Row():
331
+ with gr.Column(scale=1):
332
+ # Inputs
333
+ symbol_input = gr.Textbox(label="Stock Symbol", value="BABA", placeholder="e.g., AAPL, MSFT, GOOGL")
334
+ days_input = gr.Slider(label="Days of History", minimum=7, maximum=90, value=90, step=1)
335
+ refresh_data = gr.Checkbox(label="Refresh Data", value=False)
336
+ analyze_button = gr.Button("Analyze Sentiment", variant="primary")
337
+
338
+ # Outputs
339
+ summary_text = gr.Markdown()
340
+ sentiment_plot = gr.Plot()
341
+ headlines_text = gr.Markdown()
342
+
343
+ # Set up event handlers
344
+ analyze_button.click(
345
+ fn=analyze_stock_sentiment,
346
+ inputs=[symbol_input, days_input, refresh_data],
347
+ outputs=[summary_text, sentiment_plot, headlines_text]
348
+ )
349
+
350
+
351
+ return app
352
+
353
+ # Main function
354
+ def main():
355
+ app = build_interface()
356
+ app.launch()
357
+
358
+ if __name__ == "__main__":
359
+ main()