diff --git a/build/vs19/freq01.csproj b/build/vs19/freq01.csproj
new file mode 100644
index 0000000..cfa34b7
--- /dev/null
+++ b/build/vs19/freq01.csproj
@@ -0,0 +1,38 @@
+
+
+
+
+ x64
+ pdbonly
+ true
+ ..\..\bin\
+
+
+ prompt
+ 4
+ false
+ false
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\bin\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\obj\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\tmp\
+
+
+ {B61A5E2F-D44A-4ED0-B058-208149DCFC79}
+ v4.7.2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/build/vs19/freq02.csproj b/build/vs19/freq02.csproj
new file mode 100644
index 0000000..6bb347f
--- /dev/null
+++ b/build/vs19/freq02.csproj
@@ -0,0 +1,38 @@
+
+
+
+
+ x64
+ pdbonly
+ true
+ ..\..\bin\
+
+
+ prompt
+ 4
+ false
+ false
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\bin\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\obj\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\tmp\
+
+
+ {B61A5E2F-D44A-4ED0-B058-208149DCFC79}
+ v4.7.2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/build/vs19/freq05.csproj b/build/vs19/freq05.csproj
new file mode 100644
index 0000000..6f5a7ef
--- /dev/null
+++ b/build/vs19/freq05.csproj
@@ -0,0 +1,38 @@
+
+
+
+
+ x64
+ pdbonly
+ true
+ ..\..\bin\
+
+
+ prompt
+ 4
+ false
+ false
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\bin\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\obj\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\tmp\
+
+
+ {B61A5E2F-D44A-4ED0-B058-208149DCFC79}
+ v4.7.2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/build/vs19/freq06.csproj b/build/vs19/freq06.csproj
new file mode 100644
index 0000000..e3a1a35
--- /dev/null
+++ b/build/vs19/freq06.csproj
@@ -0,0 +1,38 @@
+
+
+
+
+ x64
+ pdbonly
+ true
+ ..\..\bin\
+
+
+ prompt
+ 4
+ false
+ false
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\bin\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\obj\
+ $(SolutionDir)\junk\vs19\$(Configuration)\$(ProjectName)\tmp\
+
+
+ {B61A5E2F-D44A-4ED0-B058-208149DCFC79}
+ v4.7.2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/freq01.cs b/src/freq01.cs
new file mode 100644
index 0000000..b102082
--- /dev/null
+++ b/src/freq01.cs
@@ -0,0 +1,59 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace freq01
+{
+ class freq01
+ {
+ private static readonly Dictionary dict = new Dictionary();
+
+ private static void AddWord(StringBuilder word)
+ {
+ var strWord = word.ToString().ToLowerInvariant();
+
+ if (dict.ContainsKey(strWord))
+ dict[strWord]++;
+ else
+ dict[strWord] = 1;
+ }
+
+ static void Main(string[] args)
+ {
+ if (args.Length != 2)
+ {
+ Console.WriteLine("Usage: freq ");
+ return;
+ }
+
+ using (StreamReader sr = new StreamReader(args[0]))
+ {
+ var word = new StringBuilder();
+ while (sr.Peek() >= 0)
+ {
+ var ch = (char)sr.Read();
+ if (Char.IsLetter(ch))
+ {
+ word.Append(ch);
+ continue;
+ }
+
+ if (word.Length == 0)
+ continue;
+
+ AddWord(word);
+ word = new StringBuilder();
+ }
+
+ if (word.Length > 0)
+ AddWord(word);
+ }
+
+ File.WriteAllLines(args[1], dict.OrderByDescending(kvp => kvp.Value)
+ .ThenBy(kvp => kvp.Key)
+ .Select(kvp => $"{kvp.Value} {kvp.Key}"));
+ }
+ }
+}
diff --git a/src/freq02.cs b/src/freq02.cs
new file mode 100644
index 0000000..262af1b
--- /dev/null
+++ b/src/freq02.cs
@@ -0,0 +1,107 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace freq02
+{
+ class trie
+ {
+ public node Root = new node() { Word = "" };
+
+ public IEnumerable Enumerate(node root)
+ {
+ for (int i = 0; i < 26; i++)
+ {
+ var child = root.Children[i];
+ if (child != null)
+ {
+ if (child.Count > 0)
+ yield return root.Children[i];
+
+ foreach (var child_node in Enumerate(child))
+ {
+ yield return child_node;
+ }
+ }
+ }
+ }
+
+ public class node
+ {
+ public node[] Children = new node[26];
+ public int Count;
+ public string Word;
+
+ public node WeNeedToGoDeeper(int index)
+ {
+ return Children[index] ?? (Children[index] = new node() {Word = Word + (char) (index + 97)});
+ }
+ }
+ }
+
+ class freq02
+ {
+ private static readonly trie _trie = new trie();
+ private static void Count(string filename)
+ {
+ var text = File.ReadAllBytes(filename);
+
+ trie.node node = _trie.Root;
+ foreach (var ch in text)
+ {
+ if (ch >= 'a' && ch <= 'z')
+ {
+ var index = ch - 97;
+ node = node.WeNeedToGoDeeper(index);
+ continue;
+ }
+
+ if (ch >= 'A' && ch <= 'Z')
+ {
+ var index = ch - 65;
+ node = node.WeNeedToGoDeeper(index);
+ continue;
+ }
+
+ if (node == _trie.Root)
+ continue;
+
+ node.Count++;
+ node = _trie.Root;
+ }
+
+ if (node != _trie.Root)
+ {
+ node.Count++;
+ }
+ }
+
+ private static void SortAndDump(string filename)
+ {
+ var enumerable = _trie.Enumerate(_trie.Root);
+ var sorted = enumerable.OrderByDescending(n => n.Count); //.ThenBy(n => n.Word); words are already sorted during trie traversal
+ File.WriteAllLines(filename, sorted.Select(n => $"{n.Count} {n.Word}"));
+ }
+
+ static void Main(string[] args)
+ {
+ if (args.Length != 2)
+ {
+ Console.WriteLine("Usage: freq ");
+ return;
+ }
+
+ var stopwatch = new Stopwatch();
+ stopwatch.Start();
+
+ Count(args[0]);
+ SortAndDump(args[1]);
+
+ stopwatch.Stop();
+ Console.WriteLine($"Elapsed time: {(decimal) stopwatch.ElapsedMilliseconds / 1000}s");
+ }
+ }
+}
diff --git a/src/freq05.cs b/src/freq05.cs
new file mode 100644
index 0000000..2c3d790
--- /dev/null
+++ b/src/freq05.cs
@@ -0,0 +1,137 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+
+namespace freq05
+{
+ public class Accountant
+ {
+ private const int CAPACITY = 15000000;
+
+ public class node
+ {
+ public int PersonalndexStart;
+ public int Count;
+ public int Depth;
+ public string Word;
+ }
+
+ private node _root = new node();
+ private readonly char[] _enumerationWordBuf = new char[256];
+ private readonly char[] _charmap = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' };
+
+ private node[] _sparseNodes = new node[CAPACITY];
+ public int _nextFreeChunkIndex = 26;
+
+ public IEnumerable Enumerate()
+ {
+ return Enumerate(_root);
+ }
+
+ public IEnumerable Enumerate(node root)
+ {
+ var realIndex = root.PersonalndexStart;
+ for (int i = 0; i < 26; i++)
+ {
+ var child = _sparseNodes[realIndex];
+ _enumerationWordBuf[root.Depth] = _charmap[i];
+
+ if (child != null)
+ {
+ if (child.Count > 0)
+ {
+ child.Word = new string(_enumerationWordBuf, 0, root.Depth + 1);
+ yield return child;
+ }
+
+ foreach (var child_node in Enumerate(child))
+ {
+ yield return child_node;
+ }
+ }
+
+ realIndex++;
+ }
+ }
+
+ public void Count(byte[] text)
+ {
+ var indexLookup = new[]
+ {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+ var node = _root;
+ int index;
+
+ for (int i = 0; i < text.Length; i++)
+ {
+ index = indexLookup[text[i]];
+
+ if (index != -1)
+ {
+ var newnode = _sparseNodes[node.PersonalndexStart + index];
+ if (newnode == null)
+ {
+ newnode = new node();
+ _sparseNodes[node.PersonalndexStart + index] = newnode;
+ newnode.PersonalndexStart = _nextFreeChunkIndex;
+ newnode.Depth = node.Depth + 1;
+ _nextFreeChunkIndex += 26;
+ }
+
+ node = newnode;
+ continue;
+ }
+
+ if (node == _root)
+ continue;
+
+ node.Count++;
+ node = _root;
+ }
+
+ if (node != _root)
+ {
+ node.Count++;
+ }
+ }
+ }
+
+ class freq05
+ {
+ private static void SortAndDump(Accountant accountant, string filename)
+ {
+ var enumerable = accountant.Enumerate();
+ var sorted = enumerable.OrderByDescending(n => n.Count); // words are already sorted by traversal
+ File.WriteAllLines(filename, sorted.Select(n => $"{n.Count} {n.Word}"));
+ }
+
+ static void Main(string[] args)
+ {
+ if (args.Length != 2)
+ {
+ Console.WriteLine("Usage: freq ");
+ return;
+ }
+
+ var text = File.ReadAllBytes(args[0]);
+ var acc = new Accountant();
+
+ GC.TryStartNoGCRegion(244 * 1000 * 1000, true);
+ acc.Count(text);
+ SortAndDump(acc, args[1]);
+ }
+ }
+}
diff --git a/src/freq06.cs b/src/freq06.cs
new file mode 100644
index 0000000..00d7f83
--- /dev/null
+++ b/src/freq06.cs
@@ -0,0 +1,186 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Security.Policy;
+using System.Threading;
+
+namespace freq06
+{
+ public class Accountant
+ {
+ private const int CAPACITY = 15000000;
+
+ public class node
+ {
+ public int PersonalndexStart;
+ public int Count;
+ public int Depth;
+ public string Word;
+ }
+
+ public class ThreadStartInfo
+ {
+ public int Start;
+ public int End;
+ public byte[] Data;
+ }
+
+ private node _root = new node();
+ private readonly char[] _enumerationWordBuf = new char[256];
+ private readonly char[] _charmap = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' };
+ private readonly int[] _charIndexLookup = new[]
+ {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+ private volatile node[] _sparseNodes = new node[CAPACITY];
+ public volatile int _nextFreeChunkIndex = 26;
+
+ public IEnumerable Enumerate()
+ {
+ return Enumerate(_root);
+ }
+
+ public IEnumerable Enumerate(node root)
+ {
+ var realIndex = root.PersonalndexStart;
+ for (int i = 0; i < 26; i++)
+ {
+ var child = _sparseNodes[realIndex];
+ _enumerationWordBuf[root.Depth] = _charmap[i];
+
+ if (child != null)
+ {
+ if (child.Count > 0)
+ {
+ child.Word = new string(_enumerationWordBuf, 0, root.Depth + 1);
+ yield return child;
+ }
+
+ foreach (var child_node in Enumerate(child))
+ {
+ yield return child_node;
+ }
+ }
+
+ realIndex++;
+ }
+ }
+
+ public void Count(byte[] text, int chunks)
+ {
+ var len = text.Length;
+ var start = 0;
+ var threads = new List();
+
+ for (int i = 1; i <= chunks; i++)
+ {
+ var end = (len / chunks) * i + 5;
+ if (end > len)
+ end = len;
+
+ while (end < len && _charIndexLookup[text[end]] != -1)
+ {
+ end++;
+ }
+
+ var thread = new Thread(Count);
+ thread.Start(new ThreadStartInfo { Data = text, Start = start, End = end });
+ threads.Add(thread);
+
+ start = end + 1;
+ }
+
+ foreach (var thread in threads)
+ thread.Join();
+ }
+
+ private void Count(object state)
+ {
+ var tsi = state as ThreadStartInfo;
+
+ var node = _root;
+ int index;
+
+ for (int i = tsi.Start; i < tsi.End; i++)
+ {
+ index = _charIndexLookup[tsi.Data[i]];
+
+ if (index != -1)
+ {
+ var newnode = _sparseNodes[node.PersonalndexStart + index];
+ if (newnode == null)
+ {
+ lock (_sparseNodes)
+ {
+ newnode = _sparseNodes[node.PersonalndexStart + index];
+ if (newnode == null)
+ {
+ newnode = new node();
+ newnode.PersonalndexStart = _nextFreeChunkIndex;
+ newnode.Depth = node.Depth + 1;
+ _nextFreeChunkIndex += 26;
+ _sparseNodes[node.PersonalndexStart + index] = newnode;
+ }
+ }
+ }
+
+ node = newnode;
+ continue;
+ }
+
+ if (node == _root)
+ continue;
+
+ Interlocked.Increment(ref node.Count);
+ node = _root;
+ }
+
+ if (node != _root)
+ {
+ Interlocked.Increment(ref node.Count);
+ }
+ }
+ }
+
+ class freq06
+ {
+ private static void SortAndDump(Accountant accountant, string filename)
+ {
+ var enumerable = accountant.Enumerate();
+ var sorted = enumerable.OrderByDescending(n => n.Count); // words are already sorted by traversal
+ File.WriteAllLines(filename, sorted.Select(n => $"{n.Count} {n.Word}"));
+ }
+
+ static void Main(string[] args)
+ {
+ if (args.Length != 2 && args.Length != 3)
+ {
+ Console.WriteLine("Usage: freq [chunks (default=4)]");
+ return;
+ }
+
+ int chunks = 4;
+ if (args.Length == 3)
+ chunks = Int32.Parse(args[2]);
+
+ var text = File.ReadAllBytes(args[0]);
+ var acc = new Accountant();
+
+ GC.TryStartNoGCRegion(244 * 1000 * 1000, true);
+ acc.Count(text, chunks);
+ SortAndDump(acc, args[1]);
+ }
+ }
+}