diff --git a/project-templates/python/alternative-data-universe-braincompanyfilinglanguagemetricsuniverseall/research.ipynb b/project-templates/python/alternative-data-universe-braincompanyfilinglanguagemetricsuniverseall/research.ipynb new file mode 100644 index 0000000000..e49361ebee --- /dev/null +++ b/project-templates/python/alternative-data-universe-braincompanyfilinglanguagemetricsuniverseall/research.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a1b2c3d4", + "metadata": {}, + "source": [ + "![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "e5f6a7b8", + "metadata": {}, + "source": [ + "## Brain Company Filing Language Metrics Research\n", + "\n", + "This notebook studies whether SEC filing sentiment helps explain future returns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9d0e1f2", + "metadata": {}, + "outputs": [], + "source": [ + "qb = QuantBook()\n", + "# Anchor the research clock to the start of 2026 for a reproducible history window.\n", + "qb.set_start_date(2026, 1, 1)\n", + "# Daily bars will have an end_time that matches the following midnight.\n", + "qb.settings.daily_precise_end_time = False" + ] + }, + { + "cell_type": "markdown", + "id": "a3b4c5d6", + "metadata": {}, + "source": [ + "### Build a Filing Sentiment Universe\n", + "\n", + "Select US Equities with positive sentiment in both the report and MD&A sections of their latest SEC filing, then inspect the returned universe history." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7f8a9b0", + "metadata": {}, + "outputs": [], + "source": [ + "def select_assets(data: List[BrainCompanyFilingLanguageMetricsUniverseAll]) -> List[Symbol]:\n", + " # Keep names with positive sentiment in both the report and MD&A sections.\n", + " return [d.symbol for d in data\n", + " if d.report_sentiment and d.report_sentiment.sentiment and d.report_sentiment.sentiment > 0\n", + " and d.management_discussion_analyasis_of_financial_condition_and_results_of_operations\n", + " and d.management_discussion_analyasis_of_financial_condition_and_results_of_operations.sentiment\n", + " and d.management_discussion_analyasis_of_financial_condition_and_results_of_operations.sentiment > 0]\n", + "\n", + "# Add the Brain Company Filing Language Metrics universe.\n", + "universe = qb.add_universe(BrainCompanyFilingLanguageMetricsUniverseAll, select_assets)\n", + "# Request universe history of the last 90 days.\n", + "universe_history = qb.universe_history(universe, qb.time - timedelta(90), qb.time - timedelta(1), flatten=True).rename_axis(['time', 'symbol']).drop(columns=['time'])\n", + "# Print the returned shape and columns.\n", + "print(f\"Shape: {universe_history.shape}\")\n", + "print(f\"Columns: {list(universe_history.columns)}\")\n", + "universe_history.head()" + ] + }, + { + "cell_type": "markdown", + "id": "c1d2e3f4", + "metadata": {}, + "source": [ + "### Universe Diagnostics\n", + "\n", + "Check how many assets pass the filter each day and summarize the factors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5b6c7d8", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract the numeric sentiment score from the nested report-sentiment column.\n", + "universe_history['reportsentiment'] = universe_history['reportsentiment'].map(lambda x: x.sentiment if x is not None else None)\n", + "# Count selected assets by day.\n", + "universe_size = universe_history.reset_index().groupby(['time', 'symbol']).size().groupby('time').size()\n", + "print(f\"Universe days: {len(universe_size)}\")\n", + "# Store the selected symbol list.\n", + "unique_assets = list(universe_history.index.levels[1].unique())\n", + "print(f\"Mean universe size per day: {universe_size.mean():.1f}\")\n", + "print('')\n", + "print(universe_history.reportsentiment.describe().map('{:0.3f}'.format))\n", + "universe_size.plot(title='Daily Universe Size', ylabel='Universe Size');" + ] + }, + { + "cell_type": "markdown", + "id": "e9f0a1b2", + "metadata": {}, + "source": [ + "### Daily Universe Prices\n", + "\n", + "Fetch daily price history for every symbol that appears in the universe." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3d4e5f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract unique assets\n", + "symbols = list(universe_history.index.get_level_values(1).unique())\n", + "# Fetch daily historical price metrics using the earliest timestamp available in the index.\n", + "history = qb.history(symbols, universe_history.index[0][0] - timedelta(1), qb.time, Resolution.DAILY)\n", + "history" + ] + }, + { + "cell_type": "markdown", + "id": "a7b8c9d0", + "metadata": {}, + "source": [ + "### Align Filing Sentiment And Returns\n", + "\n", + "Build a joined table of report sentiment and future returns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d56c7cd", + "metadata": {}, + "outputs": [], + "source": [ + "# Align the factor with the return from the next open to the following open.\n", + "dataset = (\n", + " universe_history.reset_index().groupby(['time', 'symbol']).agg(reportsentiment=('reportsentiment', 'mean'))\n", + " .join(history.open.unstack('symbol').sort_index().pct_change(2, fill_method=None).shift(-2).stack().rename('futurereturn'), how='inner')\n", + ")\n", + "\n", + "dataset.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Foundation-Py-Default", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}