From b32e3dae69dfe4bfc7847947ecfd8a8972885973 Mon Sep 17 00:00:00 2001 From: Harshit Hajela Date: Wed, 28 Apr 2021 14:30:51 -0600 Subject: [PATCH] make string matching more lenient --- backend/companies.json | 26 +++++++++++++------------- backend/updater.py | 7 ++++++- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/backend/companies.json b/backend/companies.json index eaf617ce7..12457d9b6 100644 --- a/backend/companies.json +++ b/backend/companies.json @@ -1,32 +1,32 @@ { - "MMM": "3M Company", + "MMM": "3M", "AXP": "American Express", "AMGN": "Amgen", - "AAPL": "Apple Inc.", + "AAPL": "Apple", "BA": "Boeing", - "CAT": "Caterpillar Inc.", - "CVX": "Chevron Corporation", - "CSCO": "Cisco Systems", - "KO": "The Coca-Cola Company", - "DOW": "Dow Inc.", + "CAT": "Caterpillar", + "CVX": "Chevron", + "CSCO": "Cisco", + "KO": "Coca-Cola", + "DOW": "Dow", "GS": "Goldman Sachs", - "HD": "The Home Depot", + "HD": "Home Depot", "HON": "Honeywell", "IBM": "IBM", "INTC": "Intel", "JNJ": "Johnson & Johnson", - "JPM": "JPMorgan Chase", + "JPM": "JPMorgan", "MCD": "McDonald's", - "MRK": "Merck & Co.", + "MRK": "Merck", "MSFT": "Microsoft", "NKE": "Nike", "PG": "Procter & Gamble", "CRM": "Salesforce", "TRV": "The Travelers Companies", - "UNH": "UnitedHealth Group", + "UNH": "UnitedHealth", "VZ": "Verizon", - "V": "Visa Inc.", + "V": "Visa", "WBA": "Walgreens Boots Alliance", "WMT": "Walmart", - "DIS": "The Walt Disney Company" + "DIS": "Walt Disney" } diff --git a/backend/updater.py b/backend/updater.py index 9509ca3b6..0815e3110 100644 --- a/backend/updater.py +++ b/backend/updater.py @@ -40,8 +40,13 @@ def join(self): def fetchComments(self): for comment in self.sr_obj.stream.comments(skip_existing=True, pause_after=5): + comment_text = comment.body.casefold() for ticker in self.companies: - if ticker in comment.body or self.companies[ticker] in comment.body: + casefolded_company = self.companies[ticker].casefold() + if ('{0} '.format(ticker) in comment.body or + ' {0}'.format(ticker) in comment.body or + '{0} '.format(casefolded_company) in comment_text or + ' {0}'.format(casefolded_company) in comment_text): comment_obj = { "ticker": ticker, "text": comment.body, "timestamp": math.ceil(time.time_ns()/1000000) } self.output(comment_obj) break