From 70f2508228e28aface49b577f8fea2cf5ae90f10 Mon Sep 17 00:00:00 2001 From: Alexey Shuksto Date: Thu, 21 May 2020 16:47:55 +0300 Subject: [PATCH 1/3] add Java version (simple as f...) --- README.md | 8 ++++++++ mk.cmd | 3 +++ 2 files changed, 11 insertions(+) diff --git a/README.md b/README.md index e22498c..6debdce 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,14 @@ go build freq01.go ./freq01 pg.txt out.txt ``` +Java +---- + +```bash +javac -d junk src/freq01.java +java -cp junk freq01 pg.txt out.txt +``` + Rust ----- diff --git a/mk.cmd b/mk.cmd index ff772a5..aefef15 100644 --- a/mk.cmd +++ b/mk.cmd @@ -19,6 +19,9 @@ cl.exe /permissive- /GS /GL /W3 /Gy /Zc:wchar_t /Gm- /O2 /sdl /Zc:inline /fp:pre echo === freq01.go call go build -o bin\freq01go.exe src\freq01.go +echo === freq01.java +call javac -d bin\ src\freq01.java + echo === freq01.rs cd build\rust call cargo build --release --color=never From f39544ceedfa65a2ded025604427bd7f317f873e Mon Sep 17 00:00:00 2001 From: Alexey Shuksto Date: Thu, 21 May 2020 18:41:23 +0300 Subject: [PATCH 2/3] add actual '.java' file --- src/freq01.java | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/freq01.java diff --git a/src/freq01.java b/src/freq01.java new file mode 100644 index 0000000..6afe5ef --- /dev/null +++ b/src/freq01.java @@ -0,0 +1,68 @@ +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.*; + +class freq01 { + final static byte UPPER_A = (byte) 'A'; + final static byte UPPER_Z = (byte) 'Z'; + final static byte LOWER_A = (byte) 'a'; + final static byte LOWER_Z = (byte) 'z'; + + public static void main(String[] args) throws Exception { + if (args.length < 2) { + System.out.println("Usage: java -cp . Main in.txt out.txt"); + System.exit(1); + } + + HashMap dict = new HashMap<>(256); + try (FileChannel channel = new FileInputStream(args[0]).getChannel()) { + MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0L, channel.size()); + + byte[] word = new byte[256]; + int wordLength = 0; + while (map.remaining() > 0) { + byte b = map.get(); + if (LOWER_A <= b && b <= LOWER_Z) { + word[wordLength] = b; + wordLength += 1; + } else if (UPPER_A <= b && b <= UPPER_Z) { + word[wordLength] = (byte) (b | 0x20); + wordLength += 1; + } else if (wordLength > 0) { + String key = new String(Arrays.copyOf(word, wordLength), StandardCharsets.UTF_8); + wordLength = 0; + dict.merge(key, 1, Integer::sum); + } + } + if (wordLength > 0) { + String key = new String(Arrays.copyOf(word, wordLength), StandardCharsets.UTF_8); + dict.merge(key, 1, Integer::sum); + } + } + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(args[1]))) { + dict + .entrySet() + .stream() + .sorted((l, r) -> { + int result = r.getValue().compareTo(l.getValue()); + if (result == 0) result = l.getKey().compareTo(r.getKey()); + return result; + }) + .forEachOrdered((e) -> { + try { + writer.write(e.getValue().toString() + " " + e.getKey()); + writer.newLine(); + } catch (IOException ioException) { + ioException.printStackTrace(); + System.exit(1); + } + }); + } + } +} From 9dfb5c98bc2c61179624553c4027da5b0039b6ce Mon Sep 17 00:00:00 2001 From: Alexey Shuksto Date: Fri, 22 May 2020 15:24:33 +0300 Subject: [PATCH 3/3] make it even 'simpler' --- src/freq01.java | 107 ++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 58 deletions(-) diff --git a/src/freq01.java b/src/freq01.java index 6afe5ef..e372f55 100644 --- a/src/freq01.java +++ b/src/freq01.java @@ -1,68 +1,59 @@ -import java.io.BufferedWriter; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.nio.MappedByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.charset.StandardCharsets; -import java.util.*; +import java.io.*; +import java.util.HashMap; class freq01 { - final static byte UPPER_A = (byte) 'A'; - final static byte UPPER_Z = (byte) 'Z'; - final static byte LOWER_A = (byte) 'a'; - final static byte LOWER_Z = (byte) 'z'; + final static char[] ALPHABET = new char[256]; - public static void main(String[] args) throws Exception { - if (args.length < 2) { - System.out.println("Usage: java -cp . Main in.txt out.txt"); - System.exit(1); + static { + for (char c = 'a'; c <= 'z'; c++) { + ALPHABET[c & 0xFF] = c; + ALPHABET[c ^ 0x20] = c; + } } - HashMap dict = new HashMap<>(256); - try (FileChannel channel = new FileInputStream(args[0]).getChannel()) { - MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0L, channel.size()); + public static void main(String[] args) throws Exception { + if (args.length < 2) { + System.out.println("Usage: java -cp . freq01 in.txt out.txt"); + System.exit(1); + } - byte[] word = new byte[256]; - int wordLength = 0; - while (map.remaining() > 0) { - byte b = map.get(); - if (LOWER_A <= b && b <= LOWER_Z) { - word[wordLength] = b; - wordLength += 1; - } else if (UPPER_A <= b && b <= UPPER_Z) { - word[wordLength] = (byte) (b | 0x20); - wordLength += 1; - } else if (wordLength > 0) { - String key = new String(Arrays.copyOf(word, wordLength), StandardCharsets.UTF_8); - wordLength = 0; - dict.merge(key, 1, Integer::sum); + final HashMap dict = new HashMap<>(256); + final StringBuilder word = new StringBuilder(256); + final byte[] buffer = new byte[16 * 1024]; + try (final BufferedInputStream bis = new BufferedInputStream(new FileInputStream(args[0]))) { + for (int read = bis.read(buffer); read > 0; read = bis.read(buffer)) { + for (int i = 0; i < read; i++) { + char c = ALPHABET[buffer[i] & 0xFF]; + if (c != 0) { + word.append(c); + } else if (word.length() > 0) { + dict.merge(word.toString(), 1, Integer::sum); + word.setLength(0); + } + } + } + if (word.length() > 0) { + dict.merge(word.toString(), 1, Integer::sum); + } } - } - if (wordLength > 0) { - String key = new String(Arrays.copyOf(word, wordLength), StandardCharsets.UTF_8); - dict.merge(key, 1, Integer::sum); - } - } - try (BufferedWriter writer = new BufferedWriter(new FileWriter(args[1]))) { - dict - .entrySet() - .stream() - .sorted((l, r) -> { - int result = r.getValue().compareTo(l.getValue()); - if (result == 0) result = l.getKey().compareTo(r.getKey()); - return result; - }) - .forEachOrdered((e) -> { - try { - writer.write(e.getValue().toString() + " " + e.getKey()); - writer.newLine(); - } catch (IOException ioException) { - ioException.printStackTrace(); - System.exit(1); - } - }); + try (final BufferedWriter writer = new BufferedWriter(new FileWriter(args[1]))) { + dict.entrySet() + .stream() + .sorted((l, r) -> { + int result = r.getValue().compareTo(l.getValue()); + if (result == 0) result = l.getKey().compareTo(r.getKey()); + return result; + }) + .forEachOrdered((e) -> { + try { + writer.write(e.getValue().toString() + " " + e.getKey()); + writer.newLine(); + } catch (IOException ioException) { + ioException.printStackTrace(); + System.exit(1); + } + }); + } } - } }