Skip to content

Commit

Permalink
DYN-6146 Removing PII data from a JSON workspace (#14471)
Browse files Browse the repository at this point in the history
* Removing PII data from a JSON workspace

* Refactoring and Adding a Unit Test

* Restricting the access level

* PII Detector refactored

* Updating the Unit Test

---------

Co-authored-by: Jesus Alfredo Alviño <[email protected]>
Co-authored-by: Aaron (Qilong) <[email protected]>
  • Loading branch information
3 people authored Nov 6, 2023
1 parent 81a68f6 commit 4e0cf2e
Show file tree
Hide file tree
Showing 4 changed files with 605 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/DynamoCoreWpf/ViewModels/Core/WorkspaceViewModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,16 @@ internal void ZoomOutInternal()
ResetFitViewToggle(null);
}

internal JObject GetJsonRepresentation(EngineController engine = null)
{
// Step 1: Serialize the workspace.
var json = Model.ToJson(engine);
var json_parsed = JObject.Parse(json);

// Step 2: Add the View.
return AddViewBlockToJSON(json_parsed);
}

/// <summary>
/// WorkspaceViewModel's Save method does a two-part serialization. First, it serializes the Workspace,
/// then adds a View property to serialized Workspace, and sets its value to the serialized ViewModel.
Expand All @@ -616,14 +626,11 @@ internal void Save(string filePath, bool isBackup = false, EngineController engi

//set the name before serializing model.
this.Model.setNameBasedOnFileName(filePath, isBackup);
// Stage 1: Serialize the workspace.
var json = Model.ToJson(engine);
var json_parsed = JObject.Parse(json);

// Stage 2: Add the View.
var jo = AddViewBlockToJSON(json_parsed);
// Stage 1: Serialize the workspace and the View
var jo = GetJsonRepresentation(engine);

// Stage 3: Save
// Stage 2: Save
string saveContent;
if(saveContext == SaveContext.SaveAs && !isBackup)
{
Expand Down Expand Up @@ -653,7 +660,7 @@ internal void Save(string filePath, bool isBackup = false, EngineController engi
saveContent = jo.ToString();
}
File.WriteAllText(filePath, saveContent);

// Handle Workspace or CustomNodeWorkspace related non-serialization internal logic
// Only for actual save, update file path and recent file list
if (!isBackup)
Expand Down
133 changes: 133 additions & 0 deletions src/DynamoUtilities/PIIDetector.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;

namespace Dynamo.Utilities
{
/// <summary>
/// Helper Class for removing PII Data from a JSON workspace
/// </summary>
internal static class PIIDetector
{
const string Nodes = "Nodes";
const string InputValue = "InputValue";
const string HintPath = "HintPath";
const string Code = "Code";
const string View = "View";
const string Annotations = "Annotations";
const string Title = "Title";

/// <summary>
/// Removes the PII data from a JSON workspace indicating the status of the result
/// </summary>
/// <param name="jsonObject"></param>
/// <returns></returns>
public static Tuple<JObject,bool> RemovePIIData(JObject jsonObject)
{
JObject jObjectResult = jsonObject;
bool removeResult = true;

try
{
foreach (var properties in jObjectResult.Properties())
{
if (properties.Name == Nodes)
{
var nodes = (JArray)properties.Value;
foreach (JObject node in nodes)
{
node.Children<JProperty>().ToList().ForEach(property =>
{
if (property.Name == InputValue || property.Name == HintPath || property.Name == Code)
{
property.Value = RemovePIIData((string)property.Value);
}
});
}
}
else if (properties.Name == View)
{
var view = (JObject)properties.Value;
var viewProperties = view.Children<JProperty>();

var annotations = (JArray)viewProperties.FirstOrDefault(x => x.Name == Annotations).Value;
foreach (JObject annotation in annotations)
{
annotation.Children<JProperty>().ToList().ForEach(property =>
{
if (property.Name == Title)
{
property.Value = RemovePIIData((string)property.Value);
}
});
}
}
}
}
catch
{
removeResult = false;
}

return new Tuple<JObject, bool>(jObjectResult, removeResult);
}

static string emailPattern = @"\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*";
static string websitePattern = @"(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+";
static string directoryPattern = @"(^([a-z]|[A-Z]):(?=\\(?![\0-\37<>:""/\\|?*])|\/(?![\0-\37<>:""/\\|?*])|$)|^\\(?=[\\\/][^\0-\37<>:""/\\|?*]+)|^(?=(\\|\/)$)|^\.(?=(\\|\/)$)|^\.\.(?=(\\|\/)$)|^(?=(\\|\/)[^\0-\37<>:""/\\|?*]+)|^\.(?=(\\|\/)[^\0-\37<>:""/\\|?*]+)|^\.\.(?=(\\|\/)[^\0-\37<>:""/\\|?*]+))((\\|\/)[^\0-\37<>:""/\\|?*]+|(\\|\/)$)*()";
static string creditCardPattern = @"(\d{4}[-, ]\d{4})";
static string ssnPattern = @"\d{3}[- ]\d{2}[- ]\d{4}";
static string ipPattern = @"((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)";
static string datePattern = @"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}";

public static JToken GetNodeById(JObject jsonWorkspace,string nodeId)
{
return jsonWorkspace["Nodes"].Where(t => t.Value<string>("Id") == nodeId).Select(t => t).FirstOrDefault();
}

public static JToken GetNodeValue(JObject jsonWorkspace, string nodeId,string propertyName)
{
var node = jsonWorkspace["Nodes"].Where(t => t.Value<string>("Id") == nodeId).Select(t => t).FirstOrDefault();
var property = node.Children<JProperty>().FirstOrDefault(x => x.Name == propertyName);
return property.Value;
}

public static JToken GetNoteValue(JObject jsonWorkspace, string nodeId)
{
var x = jsonWorkspace["View"]["Annotations"];
var note = jsonWorkspace["View"]["Annotations"].Where(t => t.Value<string>("Id") == nodeId).Select(t => t).FirstOrDefault();
var property = note.Children<JProperty>().FirstOrDefault(x => x.Name == "Title");
return property.Value;
}

internal static bool ContainsEmail(string value) { return new Regex(emailPattern).Match(value).Success; }
internal static bool ContainsWebsite(string value) { return new Regex(websitePattern).Match(value).Success; }
internal static bool ContainsDirectory(string value) { return new Regex(directoryPattern).Match(value).Success; }
internal static bool ContainsCreditCard(string value) { return new Regex(creditCardPattern).Match(value).Success; }
internal static bool ContainsSSN(string value) { return new Regex(ssnPattern).Match(value).Success; }
internal static bool ContainsIpAddress(string value) { return new Regex(ipPattern).Match(value).Success; }
internal static bool ContainsDate(string value) { return new Regex(datePattern).Match(value).Success; }

/// <summary>
/// Removes the PII data based on the information patterns
/// </summary>
/// <param name="data"></param>
/// <returns></returns>
internal static string RemovePIIData(string data)
{
string result;
result = Regex.Replace(data, emailPattern, "");
result = Regex.Replace(result, websitePattern, "");
result = Regex.Replace(result, directoryPattern, "");
result = Regex.Replace(result, creditCardPattern, "");
result = Regex.Replace(result, ssnPattern, "");
result = Regex.Replace(result, ipPattern, "");
result = Regex.Replace(result, datePattern, "");

return result;
}
}
}
57 changes: 57 additions & 0 deletions test/DynamoCoreWpfTests/WorkspaceSaving.cs
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,63 @@ public void CanSaveAsNewWorkspaceWithNewGuids()
Assert.AreEqual(legacyLinterId, newLinterId);
}

[Test]
[Category("UnitTests")]
public void RemovePIIDataFromWorkspace()
{
string graphWithPIIDataPath = Path.Combine(TestDirectory, (@"UI\GraphWithPIIData.dyn"));
ViewModel.OpenCommand.Execute(graphWithPIIDataPath);

var noteWithEmailId = "75ccaa00c10c4aedab9250a6d9720951";
var nodeWithWebPageId = "cd09502288c448348bd2d0bcd0a3c088";
var nodeWithDirectoryId = "5e1f42a0cc8d427cbd7fde969a988d5f";
var noteWithCreditCardsId = "2126a32c0f474a5887205bd1b3061d8a";
var noteWithSSNsId = "5bcdbd22f679417cb7e3bd19b2d984d3";
var nodeWithIPAddressId = "8d58c36ff11d4eb89025f73b4527d55a";
var nodeWithDatesId = "7d471f2e3b7a4cc8946aa4101fbf348a";

JObject workspaceWithPIIData = ViewModel.CurrentSpaceViewModel.GetJsonRepresentation();

var valueWhitEmail = PIIDetector.GetNoteValue(workspaceWithPIIData, noteWithEmailId);
var valueWithWebPage = PIIDetector.GetNodeValue(workspaceWithPIIData, nodeWithWebPageId, "Code");
var valueWithDirectory = PIIDetector.GetNodeValue(workspaceWithPIIData, nodeWithDirectoryId, "InputValue");
var valueWithDirectory2 = PIIDetector.GetNodeValue(workspaceWithPIIData, nodeWithDirectoryId, "HintPath");
var valueWithCreditCards = PIIDetector.GetNoteValue(workspaceWithPIIData, noteWithCreditCardsId);
var valueWithSSNs = PIIDetector.GetNoteValue(workspaceWithPIIData, noteWithSSNsId);
var valueWithIPAddress = PIIDetector.GetNodeValue(workspaceWithPIIData, nodeWithIPAddressId, "InputValue");
var valueWithDates = PIIDetector.GetNodeValue(workspaceWithPIIData, nodeWithDatesId, "InputValue");

Tuple<JObject, bool> workspaceWithoutPIIDataResult = PIIDetector.RemovePIIData(ViewModel.CurrentSpaceViewModel.GetJsonRepresentation());
Assert.IsTrue(workspaceWithoutPIIDataResult.Item2);

var valueWithoutEmail = PIIDetector.GetNoteValue(workspaceWithoutPIIDataResult.Item1, noteWithEmailId);
var valueWithoutWebPage = PIIDetector.GetNodeValue(workspaceWithoutPIIDataResult.Item1, nodeWithWebPageId, "Code");
var valueWithoutDirectory = PIIDetector.GetNodeValue(workspaceWithoutPIIDataResult.Item1, nodeWithDirectoryId, "InputValue");
var valueWithoutDirectory2 = PIIDetector.GetNodeValue(workspaceWithoutPIIDataResult.Item1, nodeWithDirectoryId, "HintPath");
var valueWithoutCreditCards = PIIDetector.GetNoteValue(workspaceWithoutPIIDataResult.Item1, noteWithCreditCardsId);
var valueWithoutSSNs = PIIDetector.GetNoteValue(workspaceWithoutPIIDataResult.Item1, noteWithSSNsId);
var valueWithoutIPAddress = PIIDetector.GetNodeValue(workspaceWithoutPIIDataResult.Item1, nodeWithIPAddressId, "InputValue");
var valueWithoutDates = PIIDetector.GetNodeValue(workspaceWithoutPIIDataResult.Item1, nodeWithDatesId, "InputValue");

Assert.IsTrue(PIIDetector.ContainsEmail((string)valueWhitEmail));
Assert.IsTrue(PIIDetector.ContainsWebsite((string)valueWithWebPage));
Assert.IsTrue(PIIDetector.ContainsDirectory((string)valueWithDirectory));
Assert.IsTrue(PIIDetector.ContainsDirectory((string)valueWithDirectory2));
Assert.IsTrue(PIIDetector.ContainsCreditCard((string)valueWithCreditCards));
Assert.IsTrue(PIIDetector.ContainsSSN((string)valueWithSSNs));
Assert.IsTrue(PIIDetector.ContainsIpAddress((string)valueWithIPAddress));
Assert.IsTrue(PIIDetector.ContainsDate((string)valueWithDates));

Assert.IsFalse(PIIDetector.ContainsEmail((string)valueWithoutEmail));
Assert.IsFalse(PIIDetector.ContainsWebsite((string)valueWithoutWebPage));
Assert.IsFalse(PIIDetector.ContainsDirectory((string)valueWithoutDirectory));
Assert.IsFalse(PIIDetector.ContainsDirectory((string)valueWithoutDirectory2));
Assert.IsFalse(PIIDetector.ContainsCreditCard((string)valueWithoutCreditCards));
Assert.IsFalse(PIIDetector.ContainsSSN((string)valueWithoutSSNs));
Assert.IsFalse(PIIDetector.ContainsIpAddress((string)valueWithoutIPAddress));
Assert.IsFalse(PIIDetector.ContainsDate((string)valueWithoutDates));
}

[Test]
[Category("UnitTests")]
public void BackUpSaveDoesNotChangeName()
Expand Down
Loading

0 comments on commit 4e0cf2e

Please sign in to comment.