174 lines
6.8 KiB
Python
174 lines
6.8 KiB
Python
import requests
|
|
import pandas as pd
|
|
import time
|
|
from .base import DataFetcher
|
|
from storage.file_io import DataStorage
|
|
|
|
class UsFetcher(DataFetcher):
|
|
BASE_URL = "https://www.alphavantage.co/query"
|
|
|
|
def __init__(self, api_key: str):
|
|
super().__init__(api_key)
|
|
self.storage = DataStorage()
|
|
|
|
def _save_raw_data(self, data, symbol: str, name: str):
|
|
if data is None:
|
|
return
|
|
|
|
df = pd.DataFrame()
|
|
if isinstance(data, list):
|
|
df = pd.DataFrame(data)
|
|
elif isinstance(data, dict):
|
|
# For single-record JSON objects, convert to a DataFrame
|
|
df = pd.DataFrame([data])
|
|
|
|
if not df.empty:
|
|
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
|
|
|
|
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
|
|
params = {
|
|
"function": function,
|
|
"symbol": symbol,
|
|
"apikey": self.api_key
|
|
}
|
|
try:
|
|
time.sleep(15)
|
|
response = requests.get(self.BASE_URL, params=params)
|
|
data = response.json()
|
|
except Exception as e:
|
|
print(f"Error requesting {function}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
if data:
|
|
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
|
|
|
|
df_annual = pd.DataFrame()
|
|
|
|
if "annualReports" in data and data["annualReports"]:
|
|
df_annual = pd.DataFrame(data["annualReports"])
|
|
if "fiscalDateEnding" in df_annual.columns:
|
|
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
|
|
else:
|
|
print(f"Error fetching {function} for {symbol}: {data}")
|
|
return pd.DataFrame()
|
|
|
|
return df_annual
|
|
|
|
def get_market_metrics(self, symbol: str) -> dict:
|
|
# 1. Get Overview for PE, PB, MarketCap, Employees
|
|
overview_data = {}
|
|
try:
|
|
time.sleep(15)
|
|
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
|
|
r = requests.get(self.BASE_URL, params=params)
|
|
overview_data = r.json()
|
|
# Clean up 'None' strings from API response before processing
|
|
if isinstance(overview_data, dict):
|
|
for key, value in overview_data.items():
|
|
if value == 'None':
|
|
overview_data[key] = None
|
|
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
|
|
except Exception as e:
|
|
print(f"Error fetching OVERVIEW for {symbol}: {e}")
|
|
|
|
market_cap = float(overview_data.get("MarketCapitalization") or 0)
|
|
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
|
|
|
|
price = 0
|
|
if shares_outstanding > 0:
|
|
price = market_cap / shares_outstanding
|
|
|
|
return {
|
|
"price": price,
|
|
"name": overview_data.get("Name"),
|
|
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
|
|
"dividend_yield": float(overview_data.get("DividendYield") or 0),
|
|
"market_cap": market_cap,
|
|
"pe": float(overview_data.get("PERatio") or 0),
|
|
"pb": float(overview_data.get("PriceToBookRatio") or 0),
|
|
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
|
|
"total_share_holders": 0 # Not typically provided in basic AV Overview
|
|
}
|
|
|
|
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
|
df = self._fetch_data("INCOME_STATEMENT", symbol)
|
|
cols_map = {
|
|
"fiscalDateEnding": "date",
|
|
"totalRevenue": "revenue",
|
|
"netIncome": "net_income",
|
|
"grossProfit": "gross_profit",
|
|
"costOfRevenue": "cogs",
|
|
"researchAndDevelopment": "rd_exp",
|
|
"sellingGeneralAndAdministrative": "sga_exp",
|
|
"interestExpense": "fin_exp",
|
|
"incomeBeforeTax": "total_profit",
|
|
"incomeTaxExpense": "income_tax",
|
|
"ebit": "ebit"
|
|
}
|
|
df = df.rename(columns=cols_map)
|
|
|
|
# Convert numeric columns for analysis, keep others as is
|
|
numeric_cols = [
|
|
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
|
|
"fin_exp", "total_profit", "income_tax", "ebit",
|
|
"depreciation", "depreciationAndAmortization"
|
|
]
|
|
for col in numeric_cols:
|
|
if col in df.columns:
|
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
return df
|
|
|
|
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
|
df = self._fetch_data("BALANCE_SHEET", symbol)
|
|
cols_map = {
|
|
"fiscalDateEnding": "date",
|
|
"totalShareholderEquity": "total_equity",
|
|
"totalLiabilities": "total_liabilities",
|
|
"totalCurrentAssets": "current_assets",
|
|
"totalCurrentLiabilities": "current_liabilities",
|
|
"cashAndCashEquivalentsAtCarryingValue": "cash",
|
|
"currentNetReceivables": "receivables",
|
|
"inventory": "inventory",
|
|
"propertyPlantEquipment": "fixed_assets",
|
|
"totalAssets": "total_assets",
|
|
"goodwill": "goodwill",
|
|
"longTermInvestments": "lt_invest",
|
|
"shortTermDebt": "short_term_debt",
|
|
"currentLongTermDebt": "short_term_debt_part",
|
|
"longTermDebt": "long_term_debt",
|
|
"currentAccountsPayable": "accounts_payable",
|
|
"otherCurrentAssets": "prepayment",
|
|
"otherNonCurrentAssets": "other_assets",
|
|
"deferredRevenue": "adv_receipts"
|
|
}
|
|
df = df.rename(columns=cols_map)
|
|
|
|
numeric_cols = [
|
|
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
|
|
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
|
|
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
|
|
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
|
|
]
|
|
|
|
for col in numeric_cols:
|
|
if col in df.columns:
|
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
return df
|
|
|
|
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
|
df = self._fetch_data("CASH_FLOW", symbol)
|
|
cols_map = {
|
|
"fiscalDateEnding": "date",
|
|
"operatingCashflow": "ocf",
|
|
"capitalExpenditures": "capex",
|
|
"dividendPayout": "dividends",
|
|
"depreciationDepletionAndAmortization": "depreciation"
|
|
}
|
|
df = df.rename(columns=cols_map)
|
|
|
|
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
|
|
for col in numeric_cols:
|
|
if col in df.columns:
|
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
return df
|