Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Universe data frames improvements #8433

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<DebugType>portable</DebugType>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Accord" Version="3.6.0" />
<PackageReference Include="Accord.Fuzzy" Version="3.6.0" />
<PackageReference Include="Accord.MachineLearning" Version="3.6.0" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Accord" Version="3.6.0" />
<PackageReference Include="Accord.Math" Version="3.6.0" />
<PackageReference Include="Accord.Statistics" Version="3.6.0" />
Expand Down
2 changes: 1 addition & 1 deletion Algorithm.Python/QuantConnect.Algorithm.Python.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
<Compile Include="..\Common\Properties\SharedAssemblyInfo.cs" Link="Properties\SharedAssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
</ItemGroup>
<ItemGroup>
<Content Include="OptionUniverseFilterGreeksShortcutsRegressionAlgorithm.py" />
Expand Down
2 changes: 1 addition & 1 deletion Algorithm/QuantConnect.Algorithm.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
<PackageReference Include="NodaTime" Version="3.0.5" />
Expand Down
2 changes: 1 addition & 1 deletion AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="NodaTime" Version="3.0.5" />
</ItemGroup>
<ItemGroup>
Expand Down
12 changes: 11 additions & 1 deletion Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,17 @@ public void Add(DateTime time, object input, bool overrideValues)
}
else if (value != null)
{
ShouldFilter = false;
if (value is ICollection enumerable)
{
if (enumerable.Count != 0)
{
ShouldFilter = false;
}
}
else
{
ShouldFilter = false;
}
}
}

Expand Down
42 changes: 1 addition & 41 deletions Common/Python/PythonSlice.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,6 @@ namespace QuantConnect.Python
public class PythonSlice : Slice
{
private readonly Slice _slice;
private static readonly PyObject _converter;

static PythonSlice()
{
using (Py.GIL())
{
// Python Data class: Converts custom data (PythonData) into a python object'''
_converter = PyModule.FromString("converter",
"class Data(object):\n" +
" def __init__(self, data):\n" +
" self.data = data\n" +
" members = [attr for attr in dir(data) if not callable(attr) and not attr.startswith(\"__\")]\n" +
" for member in members:\n" +
" setattr(self, member, getattr(data, member))\n" +
" for kvp in data.GetStorageDictionary():\n" +
" name = kvp.Key.replace('-',' ').replace('.',' ').title().replace(' ', '')\n" +
" value = kvp.Value if isinstance(kvp.Value, float) else kvp.Value\n" +
" setattr(self, name, value)\n" +

" def __str__(self):\n" +
" return self.data.ToString()");
}
}

/// <summary>
/// Initializes a new instance of the <see cref="PythonSlice"/> class
Expand Down Expand Up @@ -122,24 +99,7 @@ public override dynamic this[Symbol symbol]
{
get
{
var data = _slice[symbol];

var dynamicData = data as DynamicData;
if (dynamicData != null)
{
try
{
using (Py.GIL())
{
return _converter.InvokeMethod("Data", new[] { dynamicData.ToPython() });
}
}
catch
{
// NOP
}
}
return data;
return _slice[symbol];
}
}

Expand Down
2 changes: 1 addition & 1 deletion Common/QuantConnect.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<Message Text="SelectedOptimization $(SelectedOptimization)" Importance="high" />
</Target>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="CloneExtensions" Version="1.3.0" />
<PackageReference Include="fasterflect" Version="3.0.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
Expand Down
6 changes: 5 additions & 1 deletion Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,15 @@ public class BaseDataCollectionAggregatorReader : TextSubscriptionDataSourceRead
/// <param name="config">The subscription's configuration</param>
/// <param name="date">The date this factory was produced to read data for</param>
/// <param name="isLiveMode">True if we're in live mode, false for backtesting</param>
/// <param name="objectStore">The object storage for data persistence</param>
public BaseDataCollectionAggregatorReader(IDataCacheProvider dataCacheProvider, SubscriptionDataConfig config, DateTime date,
bool isLiveMode, IObjectStore objectStore)
: base(dataCacheProvider, config, date, isLiveMode, objectStore)
{
_collectionType = config.Type;
// if the type is not a BaseDataCollection, we'll default to BaseDataCollection.
// e.g. custom Python dynamic folding collections need to be aggregated into a BaseDataCollection,
// but they implement PythonData, so casting an instance of PythonData to BaseDataCollection will fail.
_collectionType = config.Type.IsAssignableTo(typeof(BaseDataCollection)) ? config.Type : typeof(BaseDataCollection);
}

/// <summary>
Expand Down
2 changes: 1 addition & 1 deletion Engine/QuantConnect.Lean.Engine.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
<Message Text="SelectedOptimization $(SelectedOptimization)" Importance="high" />
</Target>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="fasterflect" Version="3.0.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
Expand Down
2 changes: 1 addition & 1 deletion Indicators/QuantConnect.Indicators.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
<Message Text="SelectedOptimization $(SelectedOptimization)" Importance="high" />
</Target>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
</ItemGroup>
<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion Report/QuantConnect.Report.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Deedle" Version="2.1.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
Expand Down
2 changes: 1 addition & 1 deletion Research/QuantConnect.Research.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<ItemGroup>
<PackageReference Include="Plotly.NET" Version="3.0.1" />
<PackageReference Include="Plotly.NET.Interactive" Version="3.0.2" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="NodaTime" Version="3.0.5" />
</ItemGroup>
<ItemGroup>
Expand Down
159 changes: 159 additions & 0 deletions Tests/Algorithm/AlgorithmHistoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
using QuantConnect.Data.Fundamental;
using QuantConnect.Data.UniverseSelection;
using QuantConnect.Tests.Common.Data.Fundamental;
using QuantConnect.Logging;

namespace QuantConnect.Tests.Algorithm
{
Expand Down Expand Up @@ -3296,6 +3297,164 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o
}
}

[Test]
public void CSharpCustomUniverseHistoryDataFramesHaveExpectedFormat()
{
var algorithm = GetAlgorithm(new DateTime(2015, 01, 15));
var universe = algorithm.AddUniverse<CustomUniverseData>("CustomUniverse", Resolution.Daily, (x) => x.Select(y => y.Symbol));

using (Py.GIL())
{
PythonInitializer.Initialize();
algorithm.SetPandasConverter();

using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat",
$@"
from AlgorithmImports import *

def get_universe_history(algorithm, universe, flatten):
return algorithm.history(universe, 3, flatten=flatten)
");

dynamic getUniverseHistory = testModule.GetAttr("get_universe_history");
var df = getUniverseHistory(algorithm, universe, false);
var flattenedDf = getUniverseHistory(algorithm, universe, true);

Func<CustomUniverseData, decimal> getWeight = (data) => data.Weight;
AssertCustomUniverseDataFrames(df, flattenedDf, getWeight);

var columns = ((List<PyObject>)flattenedDf.columns.to_list().As<List<PyObject>>())
.Select(column => column.InvokeMethod("__str__").GetAndDispose<string>());
CollectionAssert.DoesNotContain(columns, "data");
}
}

[Test]
public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat()
{
var algorithm = GetAlgorithm(new DateTime(2015, 01, 15));

using (Py.GIL())
{
PythonInitializer.Initialize();
algorithm.SetPandasConverter();

using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat",
$@"
from AlgorithmImports import *

class CustomUniverseData(PythonData):

def get_source(self, config: SubscriptionDataConfig, date: datetime, is_live_mode: bool) -> SubscriptionDataSource:
return SubscriptionDataSource('TestData/portfolio_targets.csv',
SubscriptionTransportMedium.LOCAL_FILE,
FileFormat.FOLDING_COLLECTION)

def reader(self, config: SubscriptionDataConfig, line: str, date: datetime, is_live_mode: bool) -> BaseData:
# Skip the header row.
if not line[0].isnumeric():
return None
items = line.split(',')
data = CustomUniverseData()
data.end_time = datetime.strptime(items[0], '%Y-%m-%d')
data.time = data.end_time - timedelta(1)
data.symbol = Symbol.create(items[1], SecurityType.EQUITY, Market.USA)
data['weight'] = float(items[2])
return data

def get_universe_history(algorithm, flatten):
universe = algorithm.add_universe(CustomUniverseData, 'CustomUniverse', Resolution.DAILY, lambda alt_coarse: [x.symbol for x in alt_coarse])
return algorithm.history(universe, 3, flatten=flatten)

");

dynamic getUniverseHistory = testModule.GetAttr("get_universe_history");
var df = getUniverseHistory(algorithm, false);
var flattenedDf = getUniverseHistory(algorithm, true);

Func<PythonData, decimal> getWeight = (data) => Convert.ToDecimal(data.GetProperty("weight"));
AssertCustomUniverseDataFrames(df, flattenedDf, getWeight);
}
}

public class CustomUniverseData : BaseDataCollection
{
public decimal Weight { get; private set; }

public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode)
{
return new SubscriptionDataSource("TestData/portfolio_targets.csv",
SubscriptionTransportMedium.LocalFile,
FileFormat.FoldingCollection);
}

public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode)
{
var csv = line.Split(',');

try
{
var endTime = DateTime.ParseExact(csv[0], "yyyy-MM-dd", CultureInfo.InvariantCulture);
var symbol = Symbol.Create(csv[1], SecurityType.Equity, Market.USA);
var weight = Convert.ToDecimal(csv[2], CultureInfo.InvariantCulture);

return new CustomUniverseData
{
Symbol = symbol,
Time = endTime - TimeSpan.FromDays(1),
EndTime = endTime,
Weight = weight
};
}
catch
{
return null;
}
}
}

private static void AssertCustomUniverseDataFrames<T>(dynamic df, dynamic flattenedDf, Func<T, decimal> getWeight)
where T : BaseData
{
var expectedDates = new List<DateTime>
{
new DateTime(2015, 01, 13),
new DateTime(2015, 01, 14),
new DateTime(2015, 01, 15),
};

var flattenedDfDates = ((List<DateTime>)flattenedDf.index.get_level_values(0).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, flattenedDfDates);

var dfDates = ((List<DateTime>)df.index.get_level_values(1).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, dfDates);

df = df.droplevel(0); // drop symbol just to make access easier
foreach (var date in expectedDates)
{
using var pyDate = date.ToPython();
var constituents = (List<T>)df.loc[pyDate].As<List<T>>();
var flattendDfConstituents = flattenedDf.loc[pyDate];

CollectionAssert.IsNotEmpty(constituents);
Assert.AreEqual(flattendDfConstituents.shape[0].As<int>(), constituents.Count);

var constituentsSymbols = constituents.Select(x => x.Symbol).ToList();
var flattendDfConstituentsSymbols = ((List<Symbol>)flattendDfConstituents.index.to_list().As<List<Symbol>>()).ToList();
CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols);

var constituentsWeights = constituents.Select(x => getWeight(x)).ToList();
var flattendDfConstituentsWeights = constituentsSymbols
.Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As<decimal>())
.Cast<decimal>()
.ToList();
CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights);
}

Log.Debug((string)df.to_string());
Log.Debug((string)flattenedDf.to_string());
}

private static void AssertDesNotThrowPythonException(Action action)
{
try
Expand Down
5 changes: 4 additions & 1 deletion Tests/QuantConnect.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(SolutionDir)\.nuget\NuGet.targets" Condition="Exists('$(SolutionDir)\.nuget\NuGet.targets')" />
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Accord" Version="3.6.0" />
<PackageReference Include="Accord.Math" Version="3.6.0" />
<PackageReference Include="Common.Logging" Version="3.4.1" />
Expand Down Expand Up @@ -240,6 +240,9 @@
<None Include="TestData\daily-stock-picker-live.csv">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="TestData\portfolio_targets.csv">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="TestData\FillForwardBars.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
10 changes: 10 additions & 0 deletions Tests/TestData/portfolio_targets.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Date,Symbol,Weight
2015-01-13,TLT,0.6403554273566532
2015-01-13,GLD,0.2966005853128983
2015-01-13,IWM,0.06304398733044848
2015-01-14,USO,0.5873635006180897
2015-01-14,GLD,0.19451676316704644
2015-01-14,TLT,0.2181197362148639
2015-01-15,IWM,0.563722959965805
2015-01-15,SPY,0.3327542780145993
2015-01-15,TLT,0.10352276201959563