From ceb3bebf444763d9323ebbac0de8bf9fba604d39 Mon Sep 17 00:00:00 2001
From: Santosh <stirumala@ufl.edu>
Date: Fri, 17 Feb 2023 23:56:07 -0800
Subject: [PATCH 1/5] fix unit tests

---
 wordninja.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/wordninja.py b/wordninja.py
index 4d7ce15..55f6285 100644
--- a/wordninja.py
+++ b/wordninja.py
@@ -40,8 +40,9 @@ def split(self, s):
     texts = _SPLIT_RE.split(s)
     assert len(punctuations) + 1 == len(texts)
     new_texts = [self._split(x) for x in texts]
-    for i, punctuation in enumerate(punctuations):
-      new_texts.insert(2*i+1, punctuation)
+    # this just seems to add spaces back to string, if they are already detected?
+    # for i, punctuation in enumerate(punctuations):
+    #  new_texts.insert(2*i+1, punctuation)
     return [item for sublist in new_texts for item in sublist]
 
 
@@ -83,9 +84,7 @@ def best_match(i):
     return reversed(out)
 
 DEFAULT_LANGUAGE_MODEL = LanguageModel(os.path.join(os.path.dirname(os.path.abspath(__file__)),'wordninja','wordninja_words.txt.gz'))
-_SPLIT_RE = re.compile(r"\s+")
+_SPLIT_RE = re.compile(r"\/+|\\+|_+|-+|\s+")
 
 def split(s):
-  return DEFAULT_LANGUAGE_MODEL.split(s)
-
-
+  return DEFAULT_LANGUAGE_MODEL.split(s)
\ No newline at end of file

From 64e9c4a12667ba6a0b22ed3503d8d84d0574ebc0 Mon Sep 17 00:00:00 2001
From: Santosh <stirumala@ufl.edu>
Date: Sat, 18 Feb 2023 00:02:08 -0800
Subject: [PATCH 2/5] make comment more helpful

---
 wordninja.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wordninja.py b/wordninja.py
index 55f6285..926370a 100644
--- a/wordninja.py
+++ b/wordninja.py
@@ -40,7 +40,7 @@ def split(self, s):
     texts = _SPLIT_RE.split(s)
     assert len(punctuations) + 1 == len(texts)
     new_texts = [self._split(x) for x in texts]
-    # this just seems to add spaces back to string, if they are already detected?
+    # this just seems to add spaces to the result array, if they are already in the input string?
     # for i, punctuation in enumerate(punctuations):
     #  new_texts.insert(2*i+1, punctuation)
     return [item for sublist in new_texts for item in sublist]

From 2f840a316b12b7fc945ad0105bd536feb3055528 Mon Sep 17 00:00:00 2001
From: Santosh <stirumala@ufl.edu>
Date: Sat, 18 Feb 2023 00:22:17 -0800
Subject: [PATCH 3/5] make comment more helpful 2

---
 wordninja.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/wordninja.py b/wordninja.py
index 926370a..db26948 100644
--- a/wordninja.py
+++ b/wordninja.py
@@ -40,7 +40,8 @@ def split(self, s):
     texts = _SPLIT_RE.split(s)
     assert len(punctuations) + 1 == len(texts)
     new_texts = [self._split(x) for x in texts]
-    # this just seems to add spaces to the result array, if they are already in the input string?
+    # this just seems to add punctuation/spaces (after PR #13) to the result array, if they are already in the input string
+    # prior to PR # 13, it seems like it would add back anything in [^a-zA-Z0-9']+
     # for i, punctuation in enumerate(punctuations):
     #  new_texts.insert(2*i+1, punctuation)
     return [item for sublist in new_texts for item in sublist]

From af701ca5f4dba778c0d81392db8318a95a9a3009 Mon Sep 17 00:00:00 2001
From: Santosh <stirumala@ufl.edu>
Date: Sat, 18 Feb 2023 00:23:39 -0800
Subject: [PATCH 4/5] make comment more helpful 3

---
 wordninja.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wordninja.py b/wordninja.py
index db26948..e0aba40 100644
--- a/wordninja.py
+++ b/wordninja.py
@@ -40,7 +40,7 @@ def split(self, s):
     texts = _SPLIT_RE.split(s)
     assert len(punctuations) + 1 == len(texts)
     new_texts = [self._split(x) for x in texts]
-    # this just seems to add punctuation/spaces (after PR #13) to the result array, if they are already in the input string
+    # this just seems to add spaces (after PR #13) to the result array, if they are already in the input string
     # prior to PR # 13, it seems like it would add back anything in [^a-zA-Z0-9']+
     # for i, punctuation in enumerate(punctuations):
     #  new_texts.insert(2*i+1, punctuation)

From 850f482f54eae19cd4ba0d2cf5426d378588e8ec Mon Sep 17 00:00:00 2001
From: Santosh <stirumala@ufl.edu>
Date: Sat, 18 Feb 2023 16:44:32 -0800
Subject: [PATCH 5/5] update comments 4 (will squash these)

---
 wordninja.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wordninja.py b/wordninja.py
index e0aba40..4000109 100644
--- a/wordninja.py
+++ b/wordninja.py
@@ -40,7 +40,7 @@ def split(self, s):
     texts = _SPLIT_RE.split(s)
     assert len(punctuations) + 1 == len(texts)
     new_texts = [self._split(x) for x in texts]
-    # this just seems to add spaces (after PR #13) to the result array, if they are already in the input string
+    # this just seems to add spaces  (after PR #13) + the new delimiters in the regex to the result array, if they are already in the input string
     # prior to PR # 13, it seems like it would add back anything in [^a-zA-Z0-9']+
     # for i, punctuation in enumerate(punctuations):
     #  new_texts.insert(2*i+1, punctuation)