From e03728e2292dc05c450a5809bed9e75ac7b7e971 Mon Sep 17 00:00:00 2001 From: Omar Hammami Date: Tue, 24 Mar 2026 00:12:37 +0300 Subject: [PATCH 1/2] added stdin option to recieve text --- src/Program.cs | 78 ++++++++++++++++++++++++------------ windows_media_ocr_cli.csproj | 36 ++++++++--------- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/src/Program.cs b/src/Program.cs index f5888fb..683df10 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -10,40 +10,70 @@ using Windows.Globalization; using System.CommandLine; using System.CommandLine.Completions; - +using System.Runtime.InteropServices.WindowsRuntime; var fileOption = new Option( - name: "--file", - description: "The file to read and display on the console." -) { - IsRequired = true -}; + name: "--file", + description: "The file to read and display on the console." +); + +var stdinOption = new Option( + name: "--stdin", + description: "Read image data from stdin.", + getDefaultValue: () => false +); + var languageOption = new Option( - name: "--language", - description: "The language that should be used during OCR.", - getDefaultValue: () => "en-US" + name: "--language", + description: "The language that should be used during OCR.", + getDefaultValue: () => "en-US" ); var modeOption = new Option( - name: "--mode", - description: "The OCR output mode.", - getDefaultValue: () => OcrOutputMode.json + name: "--mode", + description: "The OCR output mode.", + getDefaultValue: () => OcrOutputMode.json ); - var rootCommand = new RootCommand("Start an OCR analysis using Windows local OcrEngine.") { - fileOption, - languageOption, - modeOption + fileOption, + stdinOption, + languageOption, + modeOption }; -rootCommand.SetHandler(Handler, fileOption, languageOption, modeOption); +rootCommand.SetHandler(Handler, fileOption, stdinOption, languageOption, modeOption); return await rootCommand.InvokeAsync(args); - -static async Task Handler(string filepath, string language, OcrOutputMode mode) +static async Task Handler(string filepath, bool useStdin, string language, OcrOutputMode mode) { - var result = await RecognizeAsync(filepath, language); + OcrResult result; + + if (useStdin) + { + using var memoryStream = new MemoryStream(); + await Console.OpenStandardInput().CopyToAsync(memoryStream); + memoryStream.Position = 0; + + using var randomAccessStream = new InMemoryRandomAccessStream(); + await randomAccessStream.WriteAsync(memoryStream.ToArray().AsBuffer()); + randomAccessStream.Seek(0); + + // Console.WriteLine(language); + result = await RecognizeAsync(randomAccessStream, language); + } + else if (!string.IsNullOrEmpty(filepath)) + { + var path = Path.GetFullPath(filepath); + var storageFile = await StorageFile.GetFileFromPathAsync(path); + using var randomAccessStream = await storageFile.OpenReadAsync(); + result = await RecognizeAsync(randomAccessStream, language); + } + else + { + throw new Exception("Either --file or --stdin must be provided."); + } + var txt = ""; if (mode == OcrOutputMode.json) @@ -74,12 +104,8 @@ static async Task Handler(string filepath, string language, OcrOutputMode mode) Console.WriteLine(txt); } - -static async Task RecognizeAsync(string filepath, string language) +static async Task RecognizeAsync(IRandomAccessStream randomAccessStream, string language) { - var path = Path.GetFullPath(filepath); - var storageFile = await StorageFile.GetFileFromPathAsync(path); - using var randomAccessStream = await storageFile.OpenReadAsync(); var decoder = await BitmapDecoder.CreateAsync(randomAccessStream); using var softwareBitmap = await decoder.GetSoftwareBitmapAsync( BitmapPixelFormat.Bgra8, @@ -108,4 +134,4 @@ enum OcrOutputMode { json, text -} +} \ No newline at end of file diff --git a/windows_media_ocr_cli.csproj b/windows_media_ocr_cli.csproj index 5abcb91..61ae168 100644 --- a/windows_media_ocr_cli.csproj +++ b/windows_media_ocr_cli.csproj @@ -1,20 +1,20 @@  - - Exe - net481 - 10.0 - enable - enable - 0.0.1 - true - + + Exe + net472 + 10.0 + enable + enable + 0.0.1 + true + - - - - - - all - - - + + + + + + all + + + \ No newline at end of file From c414395e81e912455e0f2fa92e84af3e174ddf5f Mon Sep 17 00:00:00 2001 From: Alexandre Daubricourt Date: Sun, 29 Mar 2026 14:51:03 +0200 Subject: [PATCH 2/2] docs: update README with detailed usage instructions and features refactor: improve command-line validation for file and stdin options chore: update project file to target .NET Framework 4.8.1 and include necessary packages --- README.md | 81 ++++++++++++++++++++++++++++++++---- src/Program.cs | 63 +++++++++++++++++----------- windows_media_ocr_cli.csproj | 37 ++++++++-------- 3 files changed, 130 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 4f676cf..650869f 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,85 @@ # windows_media_ocr_cli -> 🔎 OCR CLI that outputs structured data with bounding rects using local Windows OCR API + +> 🔎 Fast OCR CLI for Windows: outputs structured data (bounding rects, text) using the local Windows OCR API ![image](https://github.com/user-attachments/assets/3a832c94-5030-41d8-9454-6869ec7cfcc1) -## How to install +--- + +## Features + +- OCR image files or image data from stdin +- Outputs results as JSON (with bounding boxes) or plain text +- Supports multiple languages (default: en-US) +- Fast, local processing (no cloud required) +- Simple CLI interface + +## Requirements + +- Windows 10/11 +- .NET Framework 4.8.1 or later +- Windows OCR API support (built-in on modern Windows) + +## Installation Download the latest executable from [Releases](https://github.com/Akronae/windows_media_ocr_cli/releases) -## How to use +Or build from source: -```bash +```sh +git clone https://github.com/Akronae/windows_media_ocr_cli.git +cd windows_media_ocr_cli +dotnet build +``` + +## Usage + +### Basic examples + +```sh +# OCR from file windows_media_ocr_cli.exe --file image.png + +# OCR from stdin (pipe image data) +type image.png | windows_media_ocr_cli.exe --stdin ``` -To see all options -```bash -windows_media_ocr_cli.exe -h + +### All options + +```sh +windows_media_ocr_cli.exe --file [--language ] [--mode ] +windows_media_ocr_cli.exe --stdin [--language ] [--mode ] ``` + +| Option | Description | Default | +| ---------- | ------------------------------------------------- | ------- | +| --file | Path to image file | | +| --stdin | Read image data from stdin | false | +| --language | OCR language (e.g. en-US, fr-FR, zh-CN) | en-US | +| --mode | Output format: json (with bounding boxes) or text | json | + +### Output formats + +- **json**: Full OCR result, including bounding rectangles and lines/words. +- **text**: Plain text output (lines joined, no structure). + +## Troubleshooting + +- Make sure you are running on Windows 10/11 with .NET Framework 4.8.1+ installed. +- If you see errors about missing OCR API, update your Windows system. +- For large images, prefer file input over stdin for performance. + +## FAQ + +**Q: Can I use this on Linux or macOS?** +A: No, this tool relies on the Windows OCR API. + +**Q: How do I specify a different language?** +A: Use `--language `, e.g. `--language fr-FR`. + +**Q: What image formats are supported?** +A: Any format supported by Windows Imaging APIs (PNG, JPEG, BMP, etc). + +## License + +MIT diff --git a/src/Program.cs b/src/Program.cs index 683df10..1dd5835 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -1,46 +1,57 @@ using System; +using System.CommandLine; +using System.CommandLine.Completions; using System.IO; +using System.Runtime.InteropServices.WindowsRuntime; using System.Text; +using System.Text.Json; using System.Threading.Tasks; +using Windows.Globalization; using Windows.Graphics.Imaging; +using Windows.Media.Ocr; using Windows.Storage; using Windows.Storage.Streams; -using System.Text.Json; -using Windows.Media.Ocr; -using Windows.Globalization; -using System.CommandLine; -using System.CommandLine.Completions; -using System.Runtime.InteropServices.WindowsRuntime; var fileOption = new Option( - name: "--file", - description: "The file to read and display on the console." + name: "--file", + description: "The file to read and display on the console." ); var stdinOption = new Option( - name: "--stdin", - description: "Read image data from stdin.", - getDefaultValue: () => false + name: "--stdin", + description: "Read image data from stdin.", + getDefaultValue: () => false ); var languageOption = new Option( - name: "--language", - description: "The language that should be used during OCR.", - getDefaultValue: () => "en-US" + name: "--language", + description: "The language that should be used during OCR.", + getDefaultValue: () => "en-US" ); var modeOption = new Option( - name: "--mode", - description: "The OCR output mode.", - getDefaultValue: () => OcrOutputMode.json + name: "--mode", + description: "The OCR output mode.", + getDefaultValue: () => OcrOutputMode.json ); var rootCommand = new RootCommand("Start an OCR analysis using Windows local OcrEngine.") { - fileOption, - stdinOption, - languageOption, - modeOption + fileOption, + stdinOption, + languageOption, + modeOption, }; + +rootCommand.AddValidator(cmdResult => +{ + var file = cmdResult.GetValueForOption(fileOption); + var stdin = cmdResult.GetValueForOption(stdinOption); + if (string.IsNullOrEmpty(file) && !stdin) + { + cmdResult.ErrorMessage = "Either --file or --stdin must be provided."; + } +}); + rootCommand.SetHandler(Handler, fileOption, stdinOption, languageOption, modeOption); return await rootCommand.InvokeAsync(args); @@ -59,7 +70,6 @@ static async Task Handler(string filepath, bool useStdin, string language, OcrOu await randomAccessStream.WriteAsync(memoryStream.ToArray().AsBuffer()); randomAccessStream.Seek(0); - // Console.WriteLine(language); result = await RecognizeAsync(randomAccessStream, language); } else if (!string.IsNullOrEmpty(filepath)) @@ -71,7 +81,10 @@ static async Task Handler(string filepath, bool useStdin, string language, OcrOu } else { - throw new Exception("Either --file or --stdin must be provided."); + // This should be unreachable due to command-line validation + throw new InvalidOperationException( + "Unreachable code: either --file or --stdin must be provided." + ); } var txt = ""; @@ -133,5 +146,5 @@ static async Task RecognizeAsync(IRandomAccessStream randomAccessStre enum OcrOutputMode { json, - text -} \ No newline at end of file + text, +} diff --git a/windows_media_ocr_cli.csproj b/windows_media_ocr_cli.csproj index 61ae168..b6a9995 100644 --- a/windows_media_ocr_cli.csproj +++ b/windows_media_ocr_cli.csproj @@ -1,20 +1,19 @@  - - Exe - net472 - 10.0 - enable - enable - 0.0.1 - true - - - - - - - - all - - - \ No newline at end of file + + Exe + net481 + 10.0 + enable + enable + 0.0.1 + true + + + + + + + all + + +