BrianIsaac commited on
Commit
9b12d20
·
1 Parent(s): ac37390

feat: add user-provided cost basis input for tax analysis

Browse files

- Add interactive dataframe UI for entering purchase prices and dates
- Support flexible date formats via python-dateutil parser
- Track and display whether user data or simulated data is used
- Add 'Load Holdings' button to pre-populate dataframe from portfolio
- Handle pandas DataFrame with nan values correctly
- Add debug logging for dataframe parsing
- Update disclaimer message dynamically based on data source
- Fall back to simulated data (80% of current price) when not provided

Resolves issue where tax calculations always used simulated data
regardless of user settings.

Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +119 -4
  3. backend/tax/interface.py +75 -10
  4. pyproject.toml +1 -0
  5. uv.lock +2 -0
README.md CHANGED
@@ -45,7 +45,7 @@ This architecture demonstrates that agentic systems can be both transparent and
45
 
46
  **Demo Video**: [TODO: Add link to demo video (1-5 minutes)]
47
 
48
- **Team**: [TODO: Add HuggingFace username]
49
 
50
  ---
51
 
 
45
 
46
  **Demo Video**: [TODO: Add link to demo video (1-5 minutes)]
47
 
48
+ **Solo, Username**: BrianIsaac
49
 
50
  ---
51
 
app.py CHANGED
@@ -2463,7 +2463,31 @@ def create_interface() -> gr.Blocks:
2463
  label="Cost Basis Method",
2464
  info="Method for calculating gains/losses"
2465
  )
2466
- tax_calculate_btn = gr.Button("Calculate Tax Impact", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2467
 
2468
  # Bottom row: Analysis output
2469
  tax_analysis_output = gr.Markdown("")
@@ -3448,8 +3472,45 @@ def create_interface() -> gr.Blocks:
3448
  history_page: gr.update(visible=True)
3449
  }
3450
 
3451
- def calculate_tax_impact(filing_status: str, annual_income: float, cost_basis_method: str):
3452
- """Calculate tax impact for current portfolio holdings."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3453
  global LAST_ANALYSIS_STATE
3454
 
3455
  if not LAST_ANALYSIS_STATE or "holdings" not in LAST_ANALYSIS_STATE:
@@ -3462,11 +3523,58 @@ Use the "Analyse Portfolio" button to analyse your portfolio, then return here t
3462
 
3463
  try:
3464
  holdings = LAST_ANALYSIS_STATE.get("holdings", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3465
  report = create_tax_analysis(
3466
  holdings=holdings,
3467
  filing_status=filing_status,
3468
  annual_income=annual_income,
3469
  cost_basis_method=cost_basis_method,
 
3470
  )
3471
  return format_tax_analysis_output(report)
3472
  except Exception as e:
@@ -3833,10 +3941,17 @@ Please try again with different parameters.
3833
  outputs=[history_table_results, history_page_info_results]
3834
  )
3835
 
 
 
 
 
 
 
 
3836
  # Tax calculator button (Enhancement #5)
3837
  tax_calculate_btn.click(
3838
  calculate_tax_impact,
3839
- inputs=[tax_filing_status, tax_annual_income, tax_cost_basis_method],
3840
  outputs=[tax_analysis_output]
3841
  )
3842
 
 
2463
  label="Cost Basis Method",
2464
  info="Method for calculating gains/losses"
2465
  )
2466
+
2467
+ # Cost basis input section
2468
+ gr.Markdown("### Purchase Information")
2469
+ gr.Markdown(
2470
+ "Enter your purchase details for accurate tax calculations. "
2471
+ "Click 'Load Holdings' to pre-populate with current holdings, then fill in purchase prices and dates."
2472
+ )
2473
+ with gr.Row():
2474
+ tax_load_holdings_btn = gr.Button("Load Holdings", size="sm")
2475
+ gr.Markdown(
2476
+ "*Accepted date formats: 9/3/17, Sep 3, 2017, 2017-09-03, etc.*",
2477
+ elem_classes="text-muted"
2478
+ )
2479
+
2480
+ tax_cost_basis_input = gr.Dataframe(
2481
+ headers=["Ticker", "Shares", "Purchase Price", "Purchase Date"],
2482
+ datatype=["str", "number", "number", "str"],
2483
+ col_count=(4, "fixed"),
2484
+ row_count=(5, "dynamic"),
2485
+ interactive=True,
2486
+ label="Cost Basis Information",
2487
+ value=[],
2488
+ )
2489
+
2490
+ tax_calculate_btn = gr.Button("Calculate Tax Impact", variant="primary")
2491
 
2492
  # Bottom row: Analysis output
2493
  tax_analysis_output = gr.Markdown("")
 
3472
  history_page: gr.update(visible=True)
3473
  }
3474
 
3475
+ def load_tax_holdings():
3476
+ """Load current portfolio holdings into the cost basis dataframe.
3477
+
3478
+ Returns:
3479
+ List of lists with [ticker, shares, "", ""] for each holding.
3480
+ """
3481
+ global LAST_ANALYSIS_STATE
3482
+
3483
+ if not LAST_ANALYSIS_STATE or "holdings" not in LAST_ANALYSIS_STATE:
3484
+ return []
3485
+
3486
+ holdings = LAST_ANALYSIS_STATE.get("holdings", [])
3487
+ dataframe_data = []
3488
+
3489
+ for holding in holdings:
3490
+ ticker = holding.get("ticker", "")
3491
+ quantity = holding.get("quantity", 0)
3492
+ if ticker and quantity > 0:
3493
+ dataframe_data.append([ticker, quantity, None, ""])
3494
+
3495
+ return dataframe_data
3496
+
3497
+ def calculate_tax_impact(
3498
+ filing_status: str,
3499
+ annual_income: float,
3500
+ cost_basis_method: str,
3501
+ cost_basis_data: list
3502
+ ):
3503
+ """Calculate tax impact for current portfolio holdings.
3504
+
3505
+ Args:
3506
+ filing_status: Tax filing status
3507
+ annual_income: Annual taxable income
3508
+ cost_basis_method: Cost basis calculation method
3509
+ cost_basis_data: Dataframe rows with ticker, shares, purchase_price, purchase_date
3510
+
3511
+ Returns:
3512
+ Formatted markdown tax analysis report
3513
+ """
3514
  global LAST_ANALYSIS_STATE
3515
 
3516
  if not LAST_ANALYSIS_STATE or "holdings" not in LAST_ANALYSIS_STATE:
 
3523
 
3524
  try:
3525
  holdings = LAST_ANALYSIS_STATE.get("holdings", [])
3526
+
3527
+ # Parse cost basis dataframe into structured format
3528
+ user_cost_basis = []
3529
+
3530
+ # Handle pandas DataFrame from Gradio
3531
+ import pandas as pd
3532
+ if isinstance(cost_basis_data, pd.DataFrame):
3533
+ logger.info(f"Received DataFrame with {len(cost_basis_data)} rows")
3534
+ if not cost_basis_data.empty:
3535
+ # Convert DataFrame to list of lists
3536
+ cost_basis_rows = cost_basis_data.values.tolist()
3537
+ logger.info(f"DataFrame data: {cost_basis_rows}")
3538
+ else:
3539
+ cost_basis_rows = []
3540
+ logger.info("DataFrame is empty")
3541
+ elif cost_basis_data is not None:
3542
+ # Already a list
3543
+ cost_basis_rows = cost_basis_data
3544
+ logger.info(f"Received list with {len(cost_basis_rows)} rows")
3545
+ else:
3546
+ cost_basis_rows = []
3547
+ logger.info("No cost basis data provided")
3548
+
3549
+ for row in cost_basis_rows:
3550
+ if len(row) >= 4:
3551
+ # Handle pandas nan values
3552
+ ticker_val = row[0]
3553
+ if pd.isna(ticker_val) or not str(ticker_val).strip():
3554
+ continue
3555
+
3556
+ ticker = str(ticker_val).strip().upper()
3557
+ shares = None if pd.isna(row[1]) else row[1]
3558
+ purchase_price = None if pd.isna(row[2]) else row[2]
3559
+ purchase_date = "" if pd.isna(row[3]) else str(row[3]).strip()
3560
+
3561
+ # Only include rows with meaningful data
3562
+ if ticker and purchase_price is not None and purchase_date:
3563
+ user_cost_basis.append({
3564
+ "ticker": ticker,
3565
+ "shares": shares,
3566
+ "purchase_price": purchase_price,
3567
+ "purchase_date": purchase_date,
3568
+ })
3569
+
3570
+ logger.info(f"Parsed {len(user_cost_basis)} valid cost basis entries: {user_cost_basis}")
3571
+
3572
  report = create_tax_analysis(
3573
  holdings=holdings,
3574
  filing_status=filing_status,
3575
  annual_income=annual_income,
3576
  cost_basis_method=cost_basis_method,
3577
+ user_cost_basis=user_cost_basis if user_cost_basis else None,
3578
  )
3579
  return format_tax_analysis_output(report)
3580
  except Exception as e:
 
3941
  outputs=[history_table_results, history_page_info_results]
3942
  )
3943
 
3944
+ # Tax load holdings button
3945
+ tax_load_holdings_btn.click(
3946
+ load_tax_holdings,
3947
+ inputs=[],
3948
+ outputs=[tax_cost_basis_input]
3949
+ )
3950
+
3951
  # Tax calculator button (Enhancement #5)
3952
  tax_calculate_btn.click(
3953
  calculate_tax_impact,
3954
+ inputs=[tax_filing_status, tax_annual_income, tax_cost_basis_method, tax_cost_basis_input],
3955
  outputs=[tax_analysis_output]
3956
  )
3957
 
backend/tax/interface.py CHANGED
@@ -8,6 +8,7 @@ from datetime import date
8
  from decimal import Decimal
9
  from typing import List, Dict, Any, Tuple, Optional
10
  import logging
 
11
 
12
  from backend.tax.calculator import TaxCalculator
13
  from backend.tax.optimizer import TaxOptimizer
@@ -24,6 +25,7 @@ def create_tax_analysis(
24
  filing_status: str = "single",
25
  annual_income: float = 75000.0,
26
  cost_basis_method: str = "fifo",
 
27
  ) -> Dict[str, Any]:
28
  """Create comprehensive tax analysis for portfolio holdings.
29
 
@@ -32,6 +34,7 @@ def create_tax_analysis(
32
  filing_status: Tax filing status (single, married_joint, married_separate, head_of_household)
33
  annual_income: Annual taxable income
34
  cost_basis_method: Cost basis method (fifo, lifo, hifo, average)
 
35
 
36
  Returns:
37
  Dictionary containing tax analysis results
@@ -62,8 +65,18 @@ def create_tax_analysis(
62
  tax_year=2024,
63
  )
64
 
65
- # For demonstration, create sample tax lots from holdings
66
- # In real implementation, this would come from transaction history
 
 
 
 
 
 
 
 
 
 
67
  current_prices = {}
68
  for holding in holdings:
69
  ticker = holding.get("ticker", "")
@@ -76,14 +89,53 @@ def create_tax_analysis(
76
  )
77
  current_prices[ticker] = Decimal(str(current_price))
78
 
79
- # Create sample tax lot (assuming purchase at 20% below current price for demo)
80
- # In real implementation, use actual transaction data
81
  quantity = Decimal(str(holding.get("quantity", 0)))
82
- if quantity > 0:
83
- # Simulate purchase at lower price to show potential gains
84
- sample_cost_basis = current_price * Decimal("0.8")
85
- acquisition_date = date.today().replace(year=date.today().year - 1)
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  tax_calc.add_tax_lot(
88
  ticker=ticker,
89
  acquisition_date=acquisition_date,
@@ -128,6 +180,7 @@ def create_tax_analysis(
128
  "filing_status": filing_status,
129
  "annual_income": annual_income,
130
  "cost_basis_method": cost_basis_method,
 
131
  "long_term_rate": float(lt_rate * 100),
132
  "short_term_rate": float(st_rate * 100),
133
  "total_unrealised_gains": float(total_unrealised_gains),
@@ -306,9 +359,21 @@ Commonly used for mutual funds and some ETFs.""",
306
  3. Review wash sale implications before selling and repurchasing
307
  4. Consider tax-efficient rebalancing strategies
308
  5. Consult your tax advisor before implementing any strategy
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
- *This analysis assumes current holdings were purchased at prices 20% below current market prices for demonstration purposes.
311
- For accurate analysis, provide actual transaction history including purchase dates and costs.*
312
  """
313
 
314
  return md
 
8
  from decimal import Decimal
9
  from typing import List, Dict, Any, Tuple, Optional
10
  import logging
11
+ from dateutil import parser as date_parser
12
 
13
  from backend.tax.calculator import TaxCalculator
14
  from backend.tax.optimizer import TaxOptimizer
 
25
  filing_status: str = "single",
26
  annual_income: float = 75000.0,
27
  cost_basis_method: str = "fifo",
28
+ user_cost_basis: Optional[List[Dict[str, Any]]] = None,
29
  ) -> Dict[str, Any]:
30
  """Create comprehensive tax analysis for portfolio holdings.
31
 
 
34
  filing_status: Tax filing status (single, married_joint, married_separate, head_of_household)
35
  annual_income: Annual taxable income
36
  cost_basis_method: Cost basis method (fifo, lifo, hifo, average)
37
+ user_cost_basis: Optional user-provided cost basis data with ticker, shares, purchase_price, purchase_date
38
 
39
  Returns:
40
  Dictionary containing tax analysis results
 
65
  tax_year=2024,
66
  )
67
 
68
+ # Convert user cost basis to dictionary for easy lookup
69
+ user_basis_map = {}
70
+ if user_cost_basis:
71
+ for entry in user_cost_basis:
72
+ ticker = entry.get("ticker", "").strip().upper()
73
+ if ticker:
74
+ user_basis_map[ticker] = entry
75
+
76
+ # Track whether we used any user-provided data
77
+ used_user_data = False
78
+
79
+ # Create tax lots from holdings using user data or simulated data
80
  current_prices = {}
81
  for holding in holdings:
82
  ticker = holding.get("ticker", "")
 
89
  )
90
  current_prices[ticker] = Decimal(str(current_price))
91
 
 
 
92
  quantity = Decimal(str(holding.get("quantity", 0)))
93
+ if quantity <= 0:
94
+ continue
 
 
95
 
96
+ # Use user-provided cost basis if available
97
+ if ticker in user_basis_map:
98
+ user_entry = user_basis_map[ticker]
99
+ try:
100
+ # Parse purchase price
101
+ purchase_price = Decimal(str(user_entry.get("purchase_price", 0)))
102
+ if purchase_price <= 0:
103
+ raise ValueError("Invalid purchase price")
104
+
105
+ # Parse purchase date
106
+ date_str = user_entry.get("purchase_date", "")
107
+ if isinstance(date_str, str) and date_str.strip():
108
+ acquisition_date = date_parser.parse(date_str).date()
109
+ else:
110
+ raise ValueError("Invalid purchase date")
111
+
112
+ # Use user-specified shares if available
113
+ user_shares = user_entry.get("shares")
114
+ if user_shares is not None:
115
+ quantity = Decimal(str(user_shares))
116
+
117
+ tax_calc.add_tax_lot(
118
+ ticker=ticker,
119
+ acquisition_date=acquisition_date,
120
+ quantity=quantity,
121
+ cost_basis_per_share=purchase_price,
122
+ )
123
+ used_user_data = True
124
+ except (ValueError, AttributeError) as e:
125
+ logger.warning(f"Invalid user cost basis for {ticker}: {e}. Using simulated data.")
126
+ # Fall back to simulated data
127
+ sample_cost_basis = current_prices[ticker] * Decimal("0.8")
128
+ acquisition_date = date.today().replace(year=date.today().year - 1)
129
+ tax_calc.add_tax_lot(
130
+ ticker=ticker,
131
+ acquisition_date=acquisition_date,
132
+ quantity=quantity,
133
+ cost_basis_per_share=sample_cost_basis,
134
+ )
135
+ else:
136
+ # Use simulated data (20% below current price for demonstration)
137
+ sample_cost_basis = current_prices[ticker] * Decimal("0.8")
138
+ acquisition_date = date.today().replace(year=date.today().year - 1)
139
  tax_calc.add_tax_lot(
140
  ticker=ticker,
141
  acquisition_date=acquisition_date,
 
180
  "filing_status": filing_status,
181
  "annual_income": annual_income,
182
  "cost_basis_method": cost_basis_method,
183
+ "used_user_data": used_user_data,
184
  "long_term_rate": float(lt_rate * 100),
185
  "short_term_rate": float(st_rate * 100),
186
  "total_unrealised_gains": float(total_unrealised_gains),
 
359
  3. Review wash sale implications before selling and repurchasing
360
  4. Consider tax-efficient rebalancing strategies
361
  5. Consult your tax advisor before implementing any strategy
362
+ """
363
+
364
+ # Add disclaimer based on whether user data was used
365
+ if report.get('used_user_data', False):
366
+ md += """
367
+ ---
368
+
369
+ **Data Source**: This analysis uses your provided purchase information for accurate tax calculations.
370
+ """
371
+ else:
372
+ md += """
373
+ ---
374
 
375
+ **Data Source**: This analysis assumes current holdings were purchased at prices 20% below current market prices for demonstration purposes.
376
+ For accurate analysis, use the 'Load Holdings' button and enter your actual purchase prices and dates.
377
  """
378
 
379
  return md
pyproject.toml CHANGED
@@ -60,6 +60,7 @@ dependencies = [
60
  "python-dotenv>=1.0.0",
61
  "httpx>=0.25.0",
62
  "tenacity>=8.2.0",
 
63
  # Sentiment Analysis
64
  "vaderSentiment>=3.3.2",
65
  # Monitoring & Observability
 
60
  "python-dotenv>=1.0.0",
61
  "httpx>=0.25.0",
62
  "tenacity>=8.2.0",
63
+ "python-dateutil>=2.8.0",
64
  # Sentiment Analysis
65
  "vaderSentiment>=3.3.2",
66
  # Monitoring & Observability
uv.lock CHANGED
@@ -3506,6 +3506,7 @@ dependencies = [
3506
  { name = "pydantic-ai" },
3507
  { name = "pypdf" },
3508
  { name = "pyportfolioopt" },
 
3509
  { name = "python-dotenv" },
3510
  { name = "pywavelets" },
3511
  { name = "quantstats" },
@@ -3563,6 +3564,7 @@ requires-dist = [
3563
  { name = "pydantic-ai", specifier = "==1.18.0" },
3564
  { name = "pypdf", specifier = ">=3.17.0" },
3565
  { name = "pyportfolioopt", specifier = ">=1.5.6" },
 
3566
  { name = "python-dotenv", specifier = ">=1.0.0" },
3567
  { name = "pywavelets", specifier = ">=1.4.1" },
3568
  { name = "quantstats", specifier = ">=0.0.77" },
 
3506
  { name = "pydantic-ai" },
3507
  { name = "pypdf" },
3508
  { name = "pyportfolioopt" },
3509
+ { name = "python-dateutil" },
3510
  { name = "python-dotenv" },
3511
  { name = "pywavelets" },
3512
  { name = "quantstats" },
 
3564
  { name = "pydantic-ai", specifier = "==1.18.0" },
3565
  { name = "pypdf", specifier = ">=3.17.0" },
3566
  { name = "pyportfolioopt", specifier = ">=1.5.6" },
3567
+ { name = "python-dateutil", specifier = ">=2.8.0" },
3568
  { name = "python-dotenv", specifier = ">=1.0.0" },
3569
  { name = "pywavelets", specifier = ">=1.4.1" },
3570
  { name = "quantstats", specifier = ">=0.0.77" },