import requests import pandas as pd import time from .base import DataFetcher from storage.file_io import DataStorage class UsFetcher(DataFetcher): BASE_URL = "https://www.alphavantage.co/query" def __init__(self, api_key: str): super().__init__(api_key) self.storage = DataStorage() def _save_raw_data(self, data, symbol: str, name: str): if data is None: return df = pd.DataFrame() if isinstance(data, list): df = pd.DataFrame(data) elif isinstance(data, dict): # For single-record JSON objects, convert to a DataFrame df = pd.DataFrame([data]) if not df.empty: self.storage.save_data(df, 'US', symbol, f"raw_{name}") def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame: params = { "function": function, "symbol": symbol, "apikey": self.api_key } try: time.sleep(15) response = requests.get(self.BASE_URL, params=params) data = response.json() except Exception as e: print(f"Error requesting {function}: {e}") return pd.DataFrame() if data: self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual") df_annual = pd.DataFrame() if "annualReports" in data and data["annualReports"]: df_annual = pd.DataFrame(data["annualReports"]) if "fiscalDateEnding" in df_annual.columns: df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False) else: print(f"Error fetching {function} for {symbol}: {data}") return pd.DataFrame() return df_annual def get_market_metrics(self, symbol: str) -> dict: # 1. Get Overview for PE, PB, MarketCap, Employees overview_data = {} try: time.sleep(15) params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key} r = requests.get(self.BASE_URL, params=params) overview_data = r.json() # Clean up 'None' strings from API response before processing if isinstance(overview_data, dict): for key, value in overview_data.items(): if value == 'None': overview_data[key] = None self._save_raw_data(overview_data, symbol, "market_metrics_overview") except Exception as e: print(f"Error fetching OVERVIEW for {symbol}: {e}") market_cap = float(overview_data.get("MarketCapitalization") or 0) shares_outstanding = float(overview_data.get("SharesOutstanding") or 0) price = 0 if shares_outstanding > 0: price = market_cap / shares_outstanding return { "price": price, "name": overview_data.get("Name"), "fiscal_year_end": overview_data.get("FiscalYearEnd"), "dividend_yield": float(overview_data.get("DividendYield") or 0), "market_cap": market_cap, "pe": float(overview_data.get("PERatio") or 0), "pb": float(overview_data.get("PriceToBookRatio") or 0), "employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)), "total_share_holders": 0 # Not typically provided in basic AV Overview } def get_income_statement(self, symbol: str) -> pd.DataFrame: df = self._fetch_data("INCOME_STATEMENT", symbol) cols_map = { "fiscalDateEnding": "date", "totalRevenue": "revenue", "netIncome": "net_income", "grossProfit": "gross_profit", "costOfRevenue": "cogs", "researchAndDevelopment": "rd_exp", "sellingGeneralAndAdministrative": "sga_exp", "interestExpense": "fin_exp", "incomeBeforeTax": "total_profit", "incomeTaxExpense": "income_tax", "ebit": "ebit" } df = df.rename(columns=cols_map) # Convert numeric columns for analysis, keep others as is numeric_cols = [ "revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp", "fin_exp", "total_profit", "income_tax", "ebit", "depreciation", "depreciationAndAmortization" ] for col in numeric_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') return df def get_balance_sheet(self, symbol: str) -> pd.DataFrame: df = self._fetch_data("BALANCE_SHEET", symbol) cols_map = { "fiscalDateEnding": "date", "totalShareholderEquity": "total_equity", "totalLiabilities": "total_liabilities", "totalCurrentAssets": "current_assets", "totalCurrentLiabilities": "current_liabilities", "cashAndCashEquivalentsAtCarryingValue": "cash", "currentNetReceivables": "receivables", "inventory": "inventory", "propertyPlantEquipment": "fixed_assets", "totalAssets": "total_assets", "goodwill": "goodwill", "longTermInvestments": "lt_invest", "shortTermDebt": "short_term_debt", "currentLongTermDebt": "short_term_debt_part", "longTermDebt": "long_term_debt", "currentAccountsPayable": "accounts_payable", "otherCurrentAssets": "prepayment", "otherNonCurrentAssets": "other_assets", "deferredRevenue": "adv_receipts" } df = df.rename(columns=cols_map) numeric_cols = [ "total_equity", "total_liabilities", "current_assets", "current_liabilities", "cash", "receivables", "inventory", "fixed_assets", "total_assets", "goodwill", "lt_invest", "short_term_debt", "short_term_debt_part", "long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts" ] for col in numeric_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') return df def get_cash_flow(self, symbol: str) -> pd.DataFrame: df = self._fetch_data("CASH_FLOW", symbol) cols_map = { "fiscalDateEnding": "date", "operatingCashflow": "ocf", "capitalExpenditures": "capex", "dividendPayout": "dividends", "depreciationDepletionAndAmortization": "depreciation" } df = df.rename(columns=cols_map) numeric_cols = ["ocf", "capex", "dividends", "depreciation"] for col in numeric_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') return df