FA3-Datafetch/src/fetchers/us_fetcher.py

174 lines
6.8 KiB
Python

import requests
import pandas as pd
import time
from .base import DataFetcher
from storage.file_io import DataStorage
class UsFetcher(DataFetcher):
BASE_URL = "https://www.alphavantage.co/query"
def __init__(self, api_key: str):
super().__init__(api_key)
self.storage = DataStorage()
def _save_raw_data(self, data, symbol: str, name: str):
if data is None:
return
df = pd.DataFrame()
if isinstance(data, list):
df = pd.DataFrame(data)
elif isinstance(data, dict):
# For single-record JSON objects, convert to a DataFrame
df = pd.DataFrame([data])
if not df.empty:
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
params = {
"function": function,
"symbol": symbol,
"apikey": self.api_key
}
try:
time.sleep(15)
response = requests.get(self.BASE_URL, params=params)
data = response.json()
except Exception as e:
print(f"Error requesting {function}: {e}")
return pd.DataFrame()
if data:
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
df_annual = pd.DataFrame()
if "annualReports" in data and data["annualReports"]:
df_annual = pd.DataFrame(data["annualReports"])
if "fiscalDateEnding" in df_annual.columns:
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
else:
print(f"Error fetching {function} for {symbol}: {data}")
return pd.DataFrame()
return df_annual
def get_market_metrics(self, symbol: str) -> dict:
# 1. Get Overview for PE, PB, MarketCap, Employees
overview_data = {}
try:
time.sleep(15)
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
r = requests.get(self.BASE_URL, params=params)
overview_data = r.json()
# Clean up 'None' strings from API response before processing
if isinstance(overview_data, dict):
for key, value in overview_data.items():
if value == 'None':
overview_data[key] = None
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
except Exception as e:
print(f"Error fetching OVERVIEW for {symbol}: {e}")
market_cap = float(overview_data.get("MarketCapitalization") or 0)
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
price = 0
if shares_outstanding > 0:
price = market_cap / shares_outstanding
return {
"price": price,
"name": overview_data.get("Name"),
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
"dividend_yield": float(overview_data.get("DividendYield") or 0),
"market_cap": market_cap,
"pe": float(overview_data.get("PERatio") or 0),
"pb": float(overview_data.get("PriceToBookRatio") or 0),
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
"total_share_holders": 0 # Not typically provided in basic AV Overview
}
def get_income_statement(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("INCOME_STATEMENT", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalRevenue": "revenue",
"netIncome": "net_income",
"grossProfit": "gross_profit",
"costOfRevenue": "cogs",
"researchAndDevelopment": "rd_exp",
"sellingGeneralAndAdministrative": "sga_exp",
"interestExpense": "fin_exp",
"incomeBeforeTax": "total_profit",
"incomeTaxExpense": "income_tax",
"ebit": "ebit"
}
df = df.rename(columns=cols_map)
# Convert numeric columns for analysis, keep others as is
numeric_cols = [
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
"fin_exp", "total_profit", "income_tax", "ebit",
"depreciation", "depreciationAndAmortization"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("BALANCE_SHEET", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalShareholderEquity": "total_equity",
"totalLiabilities": "total_liabilities",
"totalCurrentAssets": "current_assets",
"totalCurrentLiabilities": "current_liabilities",
"cashAndCashEquivalentsAtCarryingValue": "cash",
"currentNetReceivables": "receivables",
"inventory": "inventory",
"propertyPlantEquipment": "fixed_assets",
"totalAssets": "total_assets",
"goodwill": "goodwill",
"longTermInvestments": "lt_invest",
"shortTermDebt": "short_term_debt",
"currentLongTermDebt": "short_term_debt_part",
"longTermDebt": "long_term_debt",
"currentAccountsPayable": "accounts_payable",
"otherCurrentAssets": "prepayment",
"otherNonCurrentAssets": "other_assets",
"deferredRevenue": "adv_receipts"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("CASH_FLOW", symbol)
cols_map = {
"fiscalDateEnding": "date",
"operatingCashflow": "ocf",
"capitalExpenditures": "capex",
"dividendPayout": "dividends",
"depreciationDepletionAndAmortization": "depreciation"
}
df = df.rename(columns=cols_map)
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df