BrianIsaac's picture
feat: implement market data provider abstraction layer
75e697a
raw
history blame
12.3 kB
"""Yahoo Finance (yfinance) provider wrapper - EDUCATIONAL USE ONLY.
WARNING: This provider is for educational and prototyping purposes only.
Using yfinance in production violates Yahoo's Terms of Service.
Migrate to FMP or another legitimate provider for production use.
This module provides backward compatibility with existing yfinance-based code
while implementing the MarketDataProvider interface.
"""
import os
import logging
import warnings
from typing import Dict, List, Optional, Any
from datetime import datetime
from decimal import Decimal
import pandas as pd
from backend.data_providers.base import (
MarketDataProvider,
QuoteData,
CompanyProfile,
FinancialRatios
)
logger = logging.getLogger(__name__)
class YFinanceProvider(MarketDataProvider):
"""Yahoo Finance provider - EDUCATIONAL USE ONLY.
WARNING: This provider violates Yahoo's Terms of Service for commercial use.
Use only for:
- Educational projects
- Personal research
- Prototyping (non-commercial)
DO NOT USE in production applications. Migrate to FMP or another legitimate provider.
Attributes:
_warned: Whether ToS warning has been displayed
Examples:
>>> # Educational use only
>>> provider = YFinanceProvider()
>>> quote = provider.get_quote("AAPL")
"""
_warned = False
def __init__(self):
"""Initialize YFinance provider with ToS warning."""
try:
import yfinance as yf
self.yf = yf
except ImportError:
raise ImportError(
"yfinance library not installed. "
"Install with: uv add yfinance"
)
# Issue warning about ToS
if not YFinanceProvider._warned:
warnings.warn(
"\n" + "=" * 70 + "\n"
"WARNING: YFinance provider is for EDUCATIONAL USE ONLY\n"
"\n"
"Using yfinance in production violates Yahoo's Terms of Service:\n"
"- No commercial use allowed\n"
"- No automated data scraping\n"
"- Subject to IP bans and rate limiting\n"
"- No support or SLA\n"
"\n"
"For production use, migrate to:\n"
"- Financial Modeling Prep (FMP) - Recommended\n"
"- Alpha Vantage\n"
"- EODHD\n"
"\n"
"Set MARKET_DATA_PROVIDER=fmp to use FMP instead.\n"
"=" * 70,
UserWarning,
stacklevel=2
)
YFinanceProvider._warned = True
logger.warning("YFinance provider initialized (EDUCATIONAL USE ONLY)")
def get_quote(self, symbol: str) -> QuoteData:
"""Fetch latest quote for a symbol.
Args:
symbol: Stock ticker symbol
Returns:
QuoteData: Latest quote information
Raises:
ValueError: If symbol is invalid
RuntimeError: If API request fails
"""
try:
ticker = self.yf.Ticker(symbol)
info = ticker.info
return QuoteData(
symbol=symbol,
price=Decimal(str(info.get('currentPrice', info.get('regularMarketPrice', 0)))),
change=Decimal(str(info.get('regularMarketChange', 0))),
change_percent=Decimal(str(info.get('regularMarketChangePercent', 0))),
volume=info.get('regularMarketVolume'),
market_cap=Decimal(str(info.get('marketCap', 0))) if info.get('marketCap') else None,
timestamp=datetime.now(),
open=Decimal(str(info.get('regularMarketOpen', 0))) if info.get('regularMarketOpen') else None,
high=Decimal(str(info.get('dayHigh', 0))) if info.get('dayHigh') else None,
low=Decimal(str(info.get('dayLow', 0))) if info.get('dayLow') else None,
previous_close=Decimal(str(info.get('previousClose', 0))) if info.get('previousClose') else None,
)
except Exception as e:
logger.error(f"Failed to fetch quote for {symbol}: {e}")
raise RuntimeError(f"Failed to fetch quote for {symbol}: {e}")
def get_quotes(self, symbols: List[str]) -> Dict[str, QuoteData]:
"""Fetch quotes for multiple symbols.
Args:
symbols: List of stock ticker symbols
Returns:
Dict mapping symbols to QuoteData
"""
quotes = {}
for symbol in symbols:
try:
quotes[symbol] = self.get_quote(symbol)
except Exception as e:
logger.warning(f"Failed to fetch quote for {symbol}: {e}")
return quotes
def get_historical_prices(
self,
symbol: str,
start_date: str,
end_date: str,
interval: str = "1d"
) -> pd.DataFrame:
"""Fetch historical OHLCV data.
Args:
symbol: Stock ticker symbol
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
interval: Data interval (1d, 1h, 1m, etc.)
Returns:
DataFrame with columns: date, open, high, low, close, volume
Raises:
ValueError: If dates are invalid
RuntimeError: If API request fails
"""
try:
ticker = self.yf.Ticker(symbol)
hist = ticker.history(start=start_date, end=end_date, interval=interval)
if hist.empty:
raise ValueError(f"No historical data found for {symbol}")
# Normalize column names and convert to Decimal
df = pd.DataFrame({
'open': hist['Open'].apply(lambda x: Decimal(str(x))),
'high': hist['High'].apply(lambda x: Decimal(str(x))),
'low': hist['Low'].apply(lambda x: Decimal(str(x))),
'close': hist['Close'].apply(lambda x: Decimal(str(x))),
'volume': hist['Volume'].astype(int),
}, index=hist.index)
df.index.name = 'date'
return df
except ValueError as e:
raise
except Exception as e:
logger.error(f"Failed to fetch historical prices for {symbol}: {e}")
raise RuntimeError(f"Failed to fetch historical prices for {symbol}: {e}")
def get_company_profile(self, symbol: str) -> CompanyProfile:
"""Fetch company profile information.
Args:
symbol: Stock ticker symbol
Returns:
CompanyProfile: Company information
Raises:
RuntimeError: If API request fails
"""
try:
ticker = self.yf.Ticker(symbol)
info = ticker.info
return CompanyProfile(
symbol=symbol,
company_name=info.get('longName', info.get('shortName', symbol)),
description=info.get('longBusinessSummary'),
sector=info.get('sector'),
industry=info.get('industry'),
country=info.get('country'),
exchange=info.get('exchange'),
currency=info.get('currency'),
market_cap=Decimal(str(info.get('marketCap', 0))) if info.get('marketCap') else None,
employees=info.get('fullTimeEmployees'),
website=info.get('website'),
ceo=None, # Not available in yfinance
founded=None,
)
except Exception as e:
logger.error(f"Failed to fetch company profile for {symbol}: {e}")
raise RuntimeError(f"Failed to fetch company profile for {symbol}: {e}")
def get_financial_ratios(self, symbol: str) -> FinancialRatios:
"""Fetch financial ratios and metrics.
Args:
symbol: Stock ticker symbol
Returns:
FinancialRatios: Financial metrics
Raises:
RuntimeError: If API request fails
"""
try:
ticker = self.yf.Ticker(symbol)
info = ticker.info
return FinancialRatios(
symbol=symbol,
pe_ratio=Decimal(str(info.get('trailingPE', 0))) if info.get('trailingPE') else None,
pb_ratio=Decimal(str(info.get('priceToBook', 0))) if info.get('priceToBook') else None,
ps_ratio=Decimal(str(info.get('priceToSalesTrailing12Months', 0))) if info.get('priceToSalesTrailing12Months') else None,
dividend_yield=Decimal(str(info.get('dividendYield', 0))) if info.get('dividendYield') else None,
payout_ratio=Decimal(str(info.get('payoutRatio', 0))) if info.get('payoutRatio') else None,
roe=Decimal(str(info.get('returnOnEquity', 0))) if info.get('returnOnEquity') else None,
roa=Decimal(str(info.get('returnOnAssets', 0))) if info.get('returnOnAssets') else None,
debt_to_equity=Decimal(str(info.get('debtToEquity', 0))) if info.get('debtToEquity') else None,
current_ratio=Decimal(str(info.get('currentRatio', 0))) if info.get('currentRatio') else None,
quick_ratio=Decimal(str(info.get('quickRatio', 0))) if info.get('quickRatio') else None,
)
except Exception as e:
logger.error(f"Failed to fetch financial ratios for {symbol}: {e}")
raise RuntimeError(f"Failed to fetch financial ratios for {symbol}: {e}")
def get_technical_indicators(
self,
symbol: str,
start_date: str,
end_date: str,
indicators: List[str]
) -> Dict[str, pd.DataFrame]:
"""Calculate technical indicators from historical data.
Note: yfinance doesn't provide pre-calculated technical indicators.
This method calculates them from historical price data.
Args:
symbol: Stock ticker symbol
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
indicators: List of indicator names (RSI, MACD, SMA, EMA)
Returns:
Dict mapping indicator names to DataFrames
Raises:
ValueError: If indicators are not supported
"""
# Get historical data
hist = self.get_historical_prices(symbol, start_date, end_date)
results = {}
# Calculate indicators (simplified implementations)
for indicator in indicators:
if indicator.upper() == 'SMA_20':
results['SMA_20'] = pd.DataFrame({
'value': hist['close'].rolling(window=20).mean()
})
elif indicator.upper() == 'SMA_50':
results['SMA_50'] = pd.DataFrame({
'value': hist['close'].rolling(window=50).mean()
})
elif indicator.upper() == 'EMA_20':
results['EMA_20'] = pd.DataFrame({
'value': hist['close'].ewm(span=20).mean()
})
else:
logger.warning(f"Indicator {indicator} not yet implemented in YFinance provider")
return results
@property
def name(self) -> str:
"""Provider name."""
return "YFinance (Educational Use Only)"
@property
def rate_limit_info(self) -> Dict[str, Any]:
"""Get rate limit information.
Note: yfinance doesn't provide official rate limit info.
"""
return {
"requests_per_minute": "Unknown (unofficial API)",
"requests_remaining": "Unknown",
"requests_used": "Not tracked",
"window_reset": None,
"seconds_until_reset": None,
"warning": "yfinance has unpredictable rate limits. Subject to IP bans."
}
def close(self) -> None:
"""Close provider (no-op for yfinance)."""
logger.info("YFinance provider closed")
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.close()