From ed252278a8672fa2dd793633d2f09590e8e9cdcc Mon Sep 17 00:00:00 2001 From: sameerkhan001 Date: Tue, 31 Mar 2026 15:47:20 +0530 Subject: [PATCH 1/5] 1018565-d: Moved OCR processor under Data Extraction tree structure --- .../OCR/NET}/AWS-Textract.md | 0 .../OCR/NET}/Amazon-Linux-EC2-Setup-Guide.md | 0 .../OCR/NET/Assemblies-Required.md | 65 +++++++ .../OCR/NET}/Azure-Kubernetes-Service.md | 0 .../OCR/NET}/Azure-Vision.md | 0 .../OCR/NET}/Docker.md | 0 .../OCR/NET}/Dot-NET-Core.md | 0 .../OCR/NET}/Dot-NET-Framework.md | 0 .../OCR/NET}/Features.md | 0 .../OCR/NET/Getting-started-overview.md} | 177 +----------------- .../OCR/NET}/Linux.md | 0 .../OCR/NET}/MAC.md | 2 +- .../OCR/NET/NuGet-Packages-Required.md | 62 ++++++ .../OCR/NET}/OCR-Images/Apply-docker-aks.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions1.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions10.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions11.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions12.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions13.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions2.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions3.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions4.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions5.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions7.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions8.png | Bin .../OCR/NET}/OCR-Images/AzureFunctions9.png | Bin .../Azure_configuration_window1.png | Bin .../Blazor-Server-App-JetBrains.png | Bin .../OCR/NET}/OCR-Images/Button-docker-aks.png | Bin .../OCR-Images/Core_sample_creation_step1.png | Bin .../OCR-Images/Core_sample_creation_step2.png | Bin .../OCR-Images/Core_sample_creation_step3.png | Bin .../OCR-Images/Core_sample_creation_step4.png | Bin .../OCR/NET}/OCR-Images/Deploy-docker-aks.png | Bin .../OCR/NET}/OCR-Images/Deployment_type.png | Bin .../NET}/OCR-Images/Docker_file_commends.png | Bin .../Install-Blazor-JetBrains-Package.png | Bin .../NET}/OCR-Images/Install-MVC-Package.png | Bin .../OCR/NET}/OCR-Images/Install-leptonica.png | Bin .../OCR/NET}/OCR-Images/Install-tesseract.png | Bin .../OCR/NET}/OCR-Images/JetBrains-Package.png | Bin .../OCR/NET}/OCR-Images/LinuxStep1.png | Bin .../OCR/NET}/OCR-Images/LinuxStep2.png | Bin .../OCR/NET}/OCR-Images/LinuxStep3.png | Bin .../OCR/NET}/OCR-Images/LinuxStep4.png | Bin .../OCR/NET}/OCR-Images/LinuxStep5.png | Bin .../OCR/NET}/OCR-Images/Mac_OS_Console.png | Bin .../OCR/NET}/OCR-Images/Mac_OS_NuGet_path.png | Bin .../OCR-Images/NET-sample-Azure-step1.png | Bin .../OCR-Images/NET-sample-Azure-step2.png | Bin .../OCR-Images/NET-sample-Azure-step3.png | Bin .../OCR-Images/NET-sample-Azure-step4.png | Bin .../OCR-Images/NET-sample-creation-step1.png | Bin .../OCR-Images/NET-sample-creation-step2.png | Bin .../OCR-Images/NET-sample-creation-step3.png | Bin .../OCR-Images/NET-sample-creation-step4.png | Bin .../OCR/NET}/OCR-Images/OCR-ASPNET-Step1.png | Bin .../OCR/NET}/OCR-Images/OCR-ASPNET-Step2.png | Bin .../OCR/NET}/OCR-Images/OCR-ASPNET-Step3.png | Bin .../OCR/NET}/OCR-Images/OCR-ASPNET-Step4.png | Bin .../OCR-Images/OCR-Core-NuGet-package.png | Bin .../NET}/OCR-Images/OCR-Core-app-creation.png | Bin .../OCR-Core-project-configuration1.png | Bin .../OCR-Core-project-configuration2.png | Bin .../OCR-Images/OCR-Docker-NuGet-package.png | Bin .../NET}/OCR-Images/OCR-MVC-NuGet-package.png | Bin .../NET}/OCR-Images/OCR-MVC-app-creation.png | Bin .../OCR-MVC-project-configuration1.png | Bin .../OCR-MVC-project-configuration2.png | Bin .../OCR/NET}/OCR-Images/OCR-NET-step1.png | Bin .../OCR/NET}/OCR-Images/OCR-NET-step2.png | Bin .../OCR/NET}/OCR-Images/OCR-NET-step3.png | Bin .../NET}/OCR-Images/OCR-WF-NuGet-package.png | Bin .../NET}/OCR-Images/OCR-WF-app-creation.png | Bin .../OCR-Images/OCR-WF-configuraion-window.png | Bin .../NET}/OCR-Images/OCR-WPF-NuGet-package.png | Bin .../NET}/OCR-Images/OCR-WPF-app-creation.png | Bin .../OCR-WPF-project-configuration.png | Bin .../OCR/NET}/OCR-Images/OCR-command-aks.png | Bin .../OCR-docker-configuration-window.png | Bin .../OCR/NET}/OCR-Images/OCR-output-image.png | Bin .../OCR/NET}/OCR-Images/OCRDocker1.png | Bin .../OCR/NET}/OCR-Images/OCRDocker6.png | Bin .../OCR/NET}/OCR-Images/OCR_docker_target.png | Bin .../OCR-Images/Output-genrate-webpage.png | Bin .../OCR/NET}/OCR-Images/Output.png | Bin .../OCR/NET}/OCR-Images/Push-docker-aks.png | Bin .../NET}/OCR-Images/Redistributable-file.png | Bin .../NET}/OCR-Images/Service-docker-aks.png | Bin .../OCR/NET}/OCR-Images/Set_Copy_Always.png | Bin .../OCR/NET}/OCR-Images/Tag-docker-image.png | Bin .../OCR/NET}/OCR-Images/Tessdata-path.png | Bin .../OCR/NET}/OCR-Images/TessdataRemove.jpeg | Bin .../OCR/NET}/OCR-Images/Tessdata_Store.png | Bin .../OCR-Images/WF_sample_creation_step1.png | Bin .../OCR-Images/WF_sample_creation_step2.png | Bin .../NET}/OCR-Images/azure_NuGet_package.png | Bin .../azure_additional_information.png | Bin .../OCR/NET}/OCR-Images/azure_step1.png | Bin .../OCR/NET}/OCR-Images/azure_step10.png | Bin .../OCR/NET}/OCR-Images/azure_step11.png | Bin .../OCR/NET}/OCR-Images/azure_step12.png | Bin .../OCR/NET}/OCR-Images/azure_step13.png | Bin .../OCR/NET}/OCR-Images/azure_step5.png | Bin .../OCR/NET}/OCR-Images/azure_step6.png | Bin .../OCR/NET}/OCR-Images/azure_step7.png | Bin .../OCR/NET}/OCR-Images/azure_step8.png | Bin .../OCR/NET}/OCR-Images/azure_step9.png | Bin .../NET}/OCR-Images/blazor_nuget_package.png | Bin .../OCR-Images/blazor_server_app_creation.png | Bin .../blazor_server_broswer_window.png | Bin .../blazor_server_configuration1.png | Bin .../blazor_server_configuration2.png | Bin .../create-asp.net-core-application.png | Bin .../OCR-Images/launch-jetbrains-rider.png | Bin .../OCR/NET}/OCR-Images/mac_step1.png | Bin .../OCR/NET}/OCR-Images/mac_step2.png | Bin .../OCR/NET}/OCR-Images/mac_step3.png | Bin .../OCR/NET}/OCR-Images/mac_step4.png | Bin .../OCR/NET}/OCR-Images/mac_step5.png | Bin .../OCR/NET}/OCR-Images/mac_step6.png | Bin .../OCR/NET}/OCR-Images/mac_step7.png | Bin .../OCR/NET}/Troubleshooting.md | 0 .../OCR/NET}/WPF.md | 0 .../OCR/NET}/Windows-Forms.md | 0 .../OCR/NET}/aspnet-mvc.md | 0 .../OCR/NET}/azure.md | 0 .../OCR/NET}/blazor.md | 0 ...-for-a-pdf-document-using-cSharp-and-VB.md | 0 ...m-ocr-for-a-pdf-document-using-net-Core.md | 0 .../OCR/NET}/net-core.md | 0 .../Data-Extraction/OCR/NET/overview.md | 47 +++++ .../Data-Extraction/OCR/overview.md | 14 ++ 133 files changed, 195 insertions(+), 172 deletions(-) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/AWS-Textract.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Amazon-Linux-EC2-Setup-Guide.md (100%) create mode 100644 Document-Processing/Data-Extraction/OCR/NET/Assemblies-Required.md rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Azure-Kubernetes-Service.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Azure-Vision.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Docker.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Dot-NET-Core.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Dot-NET-Framework.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Features.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR/Working-with-OCR.md => Data-Extraction/OCR/NET/Getting-started-overview.md} (58%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Linux.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/MAC.md (99%) create mode 100644 Document-Processing/Data-Extraction/OCR/NET/NuGet-Packages-Required.md rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Apply-docker-aks.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions10.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions11.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions12.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions13.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions5.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions7.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions8.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/AzureFunctions9.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Azure_configuration_window1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Blazor-Server-App-JetBrains.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Button-docker-aks.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Core_sample_creation_step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Core_sample_creation_step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Core_sample_creation_step3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Core_sample_creation_step4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Deploy-docker-aks.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Deployment_type.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Docker_file_commends.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Install-Blazor-JetBrains-Package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Install-MVC-Package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Install-leptonica.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Install-tesseract.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/JetBrains-Package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/LinuxStep1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/LinuxStep2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/LinuxStep3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/LinuxStep4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/LinuxStep5.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Mac_OS_Console.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Mac_OS_NuGet_path.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-Azure-step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-Azure-step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-Azure-step3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-Azure-step4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-creation-step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-creation-step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-creation-step3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/NET-sample-creation-step4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-ASPNET-Step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-ASPNET-Step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-ASPNET-Step3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-ASPNET-Step4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-Core-NuGet-package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-Core-app-creation.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-Core-project-configuration1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-Core-project-configuration2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-Docker-NuGet-package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-MVC-NuGet-package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-MVC-app-creation.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-MVC-project-configuration1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-MVC-project-configuration2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-NET-step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-NET-step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-NET-step3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-WF-NuGet-package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-WF-app-creation.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-WF-configuraion-window.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-WPF-NuGet-package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-WPF-app-creation.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-WPF-project-configuration.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-command-aks.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-docker-configuration-window.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR-output-image.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCRDocker1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCRDocker6.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/OCR_docker_target.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Output-genrate-webpage.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Output.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Push-docker-aks.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Redistributable-file.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Service-docker-aks.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Set_Copy_Always.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Tag-docker-image.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Tessdata-path.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/TessdataRemove.jpeg (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/Tessdata_Store.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/WF_sample_creation_step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/WF_sample_creation_step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_NuGet_package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_additional_information.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step10.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step11.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step12.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step13.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step5.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step6.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step7.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step8.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/azure_step9.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/blazor_nuget_package.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/blazor_server_app_creation.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/blazor_server_broswer_window.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/blazor_server_configuration1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/blazor_server_configuration2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/create-asp.net-core-application.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/launch-jetbrains-rider.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step1.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step2.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step3.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step4.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step5.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step6.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/OCR-Images/mac_step7.png (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Troubleshooting.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/WPF.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/Windows-Forms.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/aspnet-mvc.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/azure.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/blazor.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/how-to-perform-ocr-for-a-pdf-document-using-cSharp-and-VB.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/how-to-perform-ocr-for-a-pdf-document-using-net-Core.md (100%) rename Document-Processing/{PDF/PDF-Library/NET/Working-with-OCR => Data-Extraction/OCR/NET}/net-core.md (100%) create mode 100644 Document-Processing/Data-Extraction/OCR/NET/overview.md create mode 100644 Document-Processing/Data-Extraction/OCR/overview.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/AWS-Textract.md b/Document-Processing/Data-Extraction/OCR/NET/AWS-Textract.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/AWS-Textract.md rename to Document-Processing/Data-Extraction/OCR/NET/AWS-Textract.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Amazon-Linux-EC2-Setup-Guide.md b/Document-Processing/Data-Extraction/OCR/NET/Amazon-Linux-EC2-Setup-Guide.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Amazon-Linux-EC2-Setup-Guide.md rename to Document-Processing/Data-Extraction/OCR/NET/Amazon-Linux-EC2-Setup-Guide.md diff --git a/Document-Processing/Data-Extraction/OCR/NET/Assemblies-Required.md b/Document-Processing/Data-Extraction/OCR/NET/Assemblies-Required.md new file mode 100644 index 0000000000..8f19c56d27 --- /dev/null +++ b/Document-Processing/Data-Extraction/OCR/NET/Assemblies-Required.md @@ -0,0 +1,65 @@ +--- +title: Assemblies Required for OCR | Syncfusion +description: This section describes the required Syncfusion assemblies needed to integrate and use the OCR Processor effectively in your applications +platform: document-processing +control: PDF +documentation: UG +keywords: Assemblies +--- +# Assemblies Required to work with OCR processor + +Get the following required assemblies by downloading the OCR library installer. Download and install the OCR library for Windows, Linux, and Mac respectively. Please refer to the advanced installation steps for more details. + +#### Syncfusion® assemblies + + + + + + + + + + + + + + + + + + + + +
Platform(s)Assemblies
+Windows Forms, WPF, ASP.NET, and ASP.NET MVC + +
    +
  • Syncfusion.OCRProcessor.Base.dll
  • +
  • Syncfusion.Pdf.Base.dll
  • +
  • Syncfusion.Compression.Base.dll
  • +
  • Syncfusion.ImagePreProcessor.Base.dll
  • +
+
+.NET Standard 2.0 + +
    +
  • Syncfusion.OCRProcessor.Portable.dll
  • +
  • Syncfusion.PdfImaging.Portable.dll
  • +
  • Syncfusion.Pdf.Portable.dll
  • +
  • Syncfusion.Compression.Portable.dll
  • +
  • {{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/3.119.1)'| markdownify }} package
  • +
  • Syncfusion.ImagePreProcessor.Portable.dll
  • +
+
+.NET 8/.NET 9/.NET 10 + +
    +
  • Syncfusion.OCRProcessor.NET.dll
  • +
  • Syncfusion.PdfImaging.NET.dll
  • +
  • Syncfusion.Pdf.NET.dll
  • +
  • Syncfusion.Compression.NET.dll
  • +
  • {{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/3.119.1)'| markdownify }} package
  • +
  • Syncfusion.ImagePreProcessor.NET.dll
  • +
+
\ No newline at end of file diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Azure-Kubernetes-Service.md b/Document-Processing/Data-Extraction/OCR/NET/Azure-Kubernetes-Service.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Azure-Kubernetes-Service.md rename to Document-Processing/Data-Extraction/OCR/NET/Azure-Kubernetes-Service.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Azure-Vision.md b/Document-Processing/Data-Extraction/OCR/NET/Azure-Vision.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Azure-Vision.md rename to Document-Processing/Data-Extraction/OCR/NET/Azure-Vision.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Docker.md b/Document-Processing/Data-Extraction/OCR/NET/Docker.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Docker.md rename to Document-Processing/Data-Extraction/OCR/NET/Docker.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Dot-NET-Core.md b/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Core.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Dot-NET-Core.md rename to Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Core.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Dot-NET-Framework.md b/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Framework.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Dot-NET-Framework.md rename to Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Framework.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Features.md b/Document-Processing/Data-Extraction/OCR/NET/Features.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Features.md rename to Document-Processing/Data-Extraction/OCR/NET/Features.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Working-with-OCR.md b/Document-Processing/Data-Extraction/OCR/NET/Getting-started-overview.md similarity index 58% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Working-with-OCR.md rename to Document-Processing/Data-Extraction/OCR/NET/Getting-started-overview.md index c7c9791ad2..e0ed2ed98b 100644 --- a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Working-with-OCR.md +++ b/Document-Processing/Data-Extraction/OCR/NET/Getting-started-overview.md @@ -1,173 +1,14 @@ --- -title: Perform OCR on PDF features | Syncfusion -description: Learn how to perform OCR on scanned PDF documents and images with different tesseract versions using Syncfusion .NET OCR library. +title: Getting started with OCR processor | Syncfusion +description: This section provides an introduction to getting started with the OCR processor and explains the basic concepts and workflow involved platform: document-processing control: PDF documentation: UG -keywords: Assemblies --- +# Getting started with OCR processor -# Working with Optical Character Recognition (OCR) - -Optical character recognition (OCR) is a technology used to convert scanned paper documents in the form of PDF files or images into searchable and editable data. - -The [Syncfusion® OCR processor library](https://www.syncfusion.com/document-processing/pdf-framework/net/pdf-library/ocr-process) has extended support to process OCR on scanned PDF documents and images with the help of Google’s [Tesseract](https://github.com/tesseract-ocr/tesseract) Optical Character Recognition engine. - -An inbuilt `image preprocessor` has been added to the OCR to prepare images for optimal recognition. This step ensures cleaner input and reduces OCR errors. The preprocessor supports the following enhancements: - -* **Convert to Grayscale** – Simplifies image data by removing color information, making text easier to detect. -* **Deskew** – Corrects tilted or rotated text for proper alignment. -* **Denoise** – Removes speckles and artifacts that can interfere with character recognition. -* **Apply Contrast Adjustment** – Enhances text visibility against the background. -* **Apply Binarize** – Converts images to black-and-white for sharper text edges, using advanced thresholding methods - -The Syncfusion® OCR processor library works seamlessly in various platforms: Azure App Services, Azure Functions, AWS Textract, Docker, WinForms, WPF, Blazor, ASP.NET MVC, ASP.NET Core with Windows, MacOS and Linux. - -N> Starting with v20.1.0.x, if you reference Syncfusion® OCR processor assemblies from the trial setup or the NuGet feed, you also have to include a license key in your projects. Please refer to this [link](https://help.syncfusion.com/common/essential-studio/licensing/overview) to learn more about registering the Syncfusion® license key in your application to use its components. - -## Key features - -* Create a searchable PDF from scanned PDF. -* Zonal text extraction from the scanned PDF. -* Preserve Unicode characters. -* Extract text from the image. -* Create a searchable PDF from large scanned PDF documents. -* Create a searchable PDF from rotated scanned PDF. -* Get OCRed text and its bounds from a scanned PDF document. -* Native call. -* Customizing the temp folder. -* Performing OCR with different Page Segmentation Mode. -* Performing OCR with different OCR Engine Mode. -* White List. -* Black List. -* Image into searchable PDF or PDF/A. -* Improved accessibility. -* Post-processing. -* Compatible with .NET Framework 4.5 and above. -* Compatible with .NET Core 2.0 and above. - -## Install .NET OCR library - -Include the OCR library in your project using two approaches. - -* NuGet Package Required (Recommended) -* Assemblies Required - -N> Starting with v21.1.x, If you reference the Syncfusion® OCR processor library from the NuGet feed, the package structure has been changed. The TesseractBinaries and Tesseract language data paths has been automatically added and do not need to add it manually. - -### NuGet Package Required (Recommended) - -Directly install the NuGet package to your application from [nuget.org](https://www.nuget.org/). - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Platform(s)NuGet Package
-Windows Forms
-Console Application (Targeting .NET Framework) -
-{{'[Syncfusion.Pdf.OCR.WinForms.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.WinForms)'| markdownify }} -
-WPF - -{{'[Syncfusion.Pdf.OCR.Wpf.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.Wpf)'| markdownify }} -
-ASP.NET - -{{'[Syncfusion.Pdf.OCR.AspNet.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.AspNet)'| markdownify }} -
-ASP.NET MVC5 - -{{'[Syncfusion.Pdf.OCR.AspNet.Mvc5.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.AspNet.Mvc5)'| markdownify }} -
-ASP.NET Core (Targeting NET Core)
-Console Application (Targeting .NET Core)
-Blazor -
-{{'[Syncfusion.PDF.OCR.Net.Core](https://www.nuget.org/packages/Syncfusion.PDF.OCR.Net.Core)'| markdownify }} -
- -### Assemblies Required - -Get the following required assemblies by downloading the OCR library installer. Download and install the OCR library for Windows, Linux, and Mac respectively. Please refer to the advanced installation steps for more details. - -#### Syncfusion® assemblies - - - - - - - - - - - - - - - - - - - - -
Platform(s)Assemblies
-Windows Forms, WPF, ASP.NET, and ASP.NET MVC - -
    -
  • Syncfusion.OCRProcessor.Base.dll
  • -
  • Syncfusion.Pdf.Base.dll
  • -
  • Syncfusion.Compression.Base.dll
  • -
  • Syncfusion.ImagePreProcessor.Base.dll
  • -
-
-.NET Standard 2.0 - -
    -
  • Syncfusion.OCRProcessor.Portable.dll
  • -
  • Syncfusion.PdfImaging.Portable.dll
  • -
  • Syncfusion.Pdf.Portable.dll
  • -
  • Syncfusion.Compression.Portable.dll
  • -
  • {{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/3.119.1)'| markdownify }} package
  • -
  • Syncfusion.ImagePreProcessor.Portable.dll
  • -
-
-.NET 8/.NET 9/.NET 10 - -
    -
  • Syncfusion.OCRProcessor.NET.dll
  • -
  • Syncfusion.PdfImaging.NET.dll
  • -
  • Syncfusion.Pdf.NET.dll
  • -
  • Syncfusion.Compression.NET.dll
  • -
  • {{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/3.119.1)'| markdownify }} package
  • -
  • Syncfusion.ImagePreProcessor.NET.dll
  • -
-
+To quickly get started with extracting text from scanned PDF documents in .NET using the Syncfusion® OCR processor Library, refer to this video tutorial: +{% youtube "https://www.youtube.com/watch?v=VhN7ETn0vyA" %} ## Prerequisites @@ -247,11 +88,6 @@ processor.PerformOCR(lDoc); {% endhighlight %} -## Get Started with OCR - -To quickly get started with extracting text from scanned PDF documents in .NET using the Syncfusion® OCR processor Library, refer to this video tutorial: -{% youtube "https://www.youtube.com/watch?v=VhN7ETn0vyA" %} - ### Perform OCR using C# Integrating the OCR processor library in any .NET application is simple. Please refer to the following steps to perform OCR in your .NET application. @@ -354,5 +190,4 @@ Refer to [this](https://help.syncfusion.com/document-processing/pdf/pdf-library/ ## Troubleshooting -Refer to [this](https://help.syncfusion.com/document-processing/pdf/pdf-library/net/working-with-ocr/troubleshooting) section for troubleshooting PDF OCR failures. - +Refer to [this](https://help.syncfusion.com/document-processing/pdf/pdf-library/net/working-with-ocr/troubleshooting) section for troubleshooting PDF OCR failures. \ No newline at end of file diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Linux.md b/Document-Processing/Data-Extraction/OCR/NET/Linux.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Linux.md rename to Document-Processing/Data-Extraction/OCR/NET/Linux.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/MAC.md b/Document-Processing/Data-Extraction/OCR/NET/MAC.md similarity index 99% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/MAC.md rename to Document-Processing/Data-Extraction/OCR/NET/MAC.md index b638d07d28..76ba56a2eb 100644 --- a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/MAC.md +++ b/Document-Processing/Data-Extraction/OCR/NET/MAC.md @@ -7,7 +7,7 @@ documentation: UG keywords: Assemblies --- -# Perform OCR in Mac +# Perform OCR on macOS The [Syncfusion® .NET OCR library](https://www.syncfusion.com/document-processing/pdf-framework/net/pdf-library/ocr-process) used to extract text from scanned PDFs and images in the Mac application. diff --git a/Document-Processing/Data-Extraction/OCR/NET/NuGet-Packages-Required.md b/Document-Processing/Data-Extraction/OCR/NET/NuGet-Packages-Required.md new file mode 100644 index 0000000000..4e70dba940 --- /dev/null +++ b/Document-Processing/Data-Extraction/OCR/NET/NuGet-Packages-Required.md @@ -0,0 +1,62 @@ +--- +title: NuGet Packages for OCR | Syncfusion +description: This section illustrates the NuGet packages required to use Syncfusion OCR processor library in various platforms and frameworks +platform: document-processing +control: PDF +documentation: UG +--- +# NuGet Packages Required for OCR processor + +Directly install the NuGet package to your application from [nuget.org](https://www.nuget.org/). + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Platform(s)NuGet Package
+Windows Forms
+Console Application (Targeting .NET Framework) +
+{{'[Syncfusion.Pdf.OCR.WinForms.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.WinForms)'| markdownify }} +
+WPF + +{{'[Syncfusion.Pdf.OCR.Wpf.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.Wpf)'| markdownify }} +
+ASP.NET + +{{'[Syncfusion.Pdf.OCR.AspNet.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.AspNet)'| markdownify }} +
+ASP.NET MVC5 + +{{'[Syncfusion.Pdf.OCR.AspNet.Mvc5.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.AspNet.Mvc5)'| markdownify }} +
+ASP.NET Core (Targeting NET Core)
+Console Application (Targeting .NET Core)
+Blazor +
+{{'[Syncfusion.PDF.OCR.Net.Core](https://www.nuget.org/packages/Syncfusion.PDF.OCR.Net.Core)'| markdownify }} +
\ No newline at end of file diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Apply-docker-aks.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Apply-docker-aks.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Apply-docker-aks.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Apply-docker-aks.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions10.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions10.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions10.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions10.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions11.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions11.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions11.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions11.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions12.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions12.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions12.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions12.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions13.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions13.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions13.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions13.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions5.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions5.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions5.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions5.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions7.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions7.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions7.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions7.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions8.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions8.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions8.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions8.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions9.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions9.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/AzureFunctions9.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/AzureFunctions9.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Azure_configuration_window1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Azure_configuration_window1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Azure_configuration_window1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Azure_configuration_window1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Blazor-Server-App-JetBrains.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Blazor-Server-App-JetBrains.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Blazor-Server-App-JetBrains.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Blazor-Server-App-JetBrains.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Button-docker-aks.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Button-docker-aks.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Button-docker-aks.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Button-docker-aks.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Core_sample_creation_step4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Core_sample_creation_step4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Deploy-docker-aks.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Deploy-docker-aks.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Deploy-docker-aks.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Deploy-docker-aks.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Deployment_type.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Deployment_type.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Deployment_type.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Deployment_type.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Docker_file_commends.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Docker_file_commends.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Docker_file_commends.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Docker_file_commends.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-Blazor-JetBrains-Package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-Blazor-JetBrains-Package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-Blazor-JetBrains-Package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-Blazor-JetBrains-Package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-MVC-Package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-MVC-Package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-MVC-Package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-MVC-Package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-leptonica.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-leptonica.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-leptonica.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-leptonica.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-tesseract.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-tesseract.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Install-tesseract.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Install-tesseract.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/JetBrains-Package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/JetBrains-Package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/JetBrains-Package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/JetBrains-Package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep5.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep5.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/LinuxStep5.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/LinuxStep5.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Mac_OS_Console.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Mac_OS_Console.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Mac_OS_Console.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Mac_OS_Console.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Mac_OS_NuGet_path.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Mac_OS_NuGet_path.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Mac_OS_NuGet_path.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Mac_OS_NuGet_path.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-Azure-step4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-Azure-step4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/NET-sample-creation-step4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/NET-sample-creation-step4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-ASPNET-Step4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-ASPNET-Step4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-NuGet-package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-NuGet-package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-NuGet-package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-NuGet-package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-app-creation.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-app-creation.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-app-creation.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-app-creation.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-project-configuration1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-project-configuration1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-project-configuration1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-project-configuration1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-project-configuration2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-project-configuration2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Core-project-configuration2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Core-project-configuration2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Docker-NuGet-package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Docker-NuGet-package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-Docker-NuGet-package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-Docker-NuGet-package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-NuGet-package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-NuGet-package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-NuGet-package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-NuGet-package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-app-creation.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-app-creation.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-app-creation.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-app-creation.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-project-configuration1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-project-configuration1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-project-configuration1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-project-configuration1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-project-configuration2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-project-configuration2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-MVC-project-configuration2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-MVC-project-configuration2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-NET-step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-NET-step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-NET-step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-NET-step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-NET-step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-NET-step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-NET-step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-NET-step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-NET-step3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-NET-step3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-NET-step3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-NET-step3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WF-NuGet-package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WF-NuGet-package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WF-NuGet-package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WF-NuGet-package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WF-app-creation.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WF-app-creation.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WF-app-creation.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WF-app-creation.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WF-configuraion-window.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WF-configuraion-window.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WF-configuraion-window.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WF-configuraion-window.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WPF-NuGet-package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WPF-NuGet-package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WPF-NuGet-package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WPF-NuGet-package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WPF-app-creation.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WPF-app-creation.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WPF-app-creation.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WPF-app-creation.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WPF-project-configuration.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WPF-project-configuration.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-WPF-project-configuration.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-WPF-project-configuration.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-command-aks.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-command-aks.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-command-aks.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-command-aks.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-docker-configuration-window.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-docker-configuration-window.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-docker-configuration-window.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-docker-configuration-window.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-output-image.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-output-image.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR-output-image.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR-output-image.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCRDocker1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCRDocker1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCRDocker1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCRDocker1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCRDocker6.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCRDocker6.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCRDocker6.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCRDocker6.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR_docker_target.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR_docker_target.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/OCR_docker_target.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/OCR_docker_target.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Output-genrate-webpage.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Output-genrate-webpage.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Output-genrate-webpage.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Output-genrate-webpage.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Output.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Output.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Output.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Output.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Push-docker-aks.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Push-docker-aks.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Push-docker-aks.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Push-docker-aks.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Redistributable-file.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Redistributable-file.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Redistributable-file.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Redistributable-file.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Service-docker-aks.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Service-docker-aks.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Service-docker-aks.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Service-docker-aks.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Set_Copy_Always.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Set_Copy_Always.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Set_Copy_Always.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Set_Copy_Always.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Tag-docker-image.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Tag-docker-image.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Tag-docker-image.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Tag-docker-image.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Tessdata-path.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Tessdata-path.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Tessdata-path.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Tessdata-path.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/TessdataRemove.jpeg b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/TessdataRemove.jpeg similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/TessdataRemove.jpeg rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/TessdataRemove.jpeg diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Tessdata_Store.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Tessdata_Store.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/Tessdata_Store.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/Tessdata_Store.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/WF_sample_creation_step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/WF_sample_creation_step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/WF_sample_creation_step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/WF_sample_creation_step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/WF_sample_creation_step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/WF_sample_creation_step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/WF_sample_creation_step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/WF_sample_creation_step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_NuGet_package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_NuGet_package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_NuGet_package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_NuGet_package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_additional_information.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_additional_information.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_additional_information.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_additional_information.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step10.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step10.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step10.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step10.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step11.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step11.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step11.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step11.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step12.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step12.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step12.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step12.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step13.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step13.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step13.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step13.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step5.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step5.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step5.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step5.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step6.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step6.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step6.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step6.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step7.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step7.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step7.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step7.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step8.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step8.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step8.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step8.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step9.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step9.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/azure_step9.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/azure_step9.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_nuget_package.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_nuget_package.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_nuget_package.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_nuget_package.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_app_creation.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_app_creation.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_app_creation.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_app_creation.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_broswer_window.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_broswer_window.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_broswer_window.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_broswer_window.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_configuration1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_configuration1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_configuration1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_configuration1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_configuration2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_configuration2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/blazor_server_configuration2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/blazor_server_configuration2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/create-asp.net-core-application.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/create-asp.net-core-application.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/create-asp.net-core-application.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/create-asp.net-core-application.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/launch-jetbrains-rider.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/launch-jetbrains-rider.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/launch-jetbrains-rider.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/launch-jetbrains-rider.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step1.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step1.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step1.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step1.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step2.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step2.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step2.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step2.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step3.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step3.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step3.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step3.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step4.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step4.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step4.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step4.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step5.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step5.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step5.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step5.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step6.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step6.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step6.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step6.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step7.png b/Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step7.png similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/OCR-Images/mac_step7.png rename to Document-Processing/Data-Extraction/OCR/NET/OCR-Images/mac_step7.png diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Troubleshooting.md b/Document-Processing/Data-Extraction/OCR/NET/Troubleshooting.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Troubleshooting.md rename to Document-Processing/Data-Extraction/OCR/NET/Troubleshooting.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/WPF.md b/Document-Processing/Data-Extraction/OCR/NET/WPF.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/WPF.md rename to Document-Processing/Data-Extraction/OCR/NET/WPF.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Windows-Forms.md b/Document-Processing/Data-Extraction/OCR/NET/Windows-Forms.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Windows-Forms.md rename to Document-Processing/Data-Extraction/OCR/NET/Windows-Forms.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/aspnet-mvc.md b/Document-Processing/Data-Extraction/OCR/NET/aspnet-mvc.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/aspnet-mvc.md rename to Document-Processing/Data-Extraction/OCR/NET/aspnet-mvc.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/azure.md b/Document-Processing/Data-Extraction/OCR/NET/azure.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/azure.md rename to Document-Processing/Data-Extraction/OCR/NET/azure.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/blazor.md b/Document-Processing/Data-Extraction/OCR/NET/blazor.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/blazor.md rename to Document-Processing/Data-Extraction/OCR/NET/blazor.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/how-to-perform-ocr-for-a-pdf-document-using-cSharp-and-VB.md b/Document-Processing/Data-Extraction/OCR/NET/how-to-perform-ocr-for-a-pdf-document-using-cSharp-and-VB.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/how-to-perform-ocr-for-a-pdf-document-using-cSharp-and-VB.md rename to Document-Processing/Data-Extraction/OCR/NET/how-to-perform-ocr-for-a-pdf-document-using-cSharp-and-VB.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/how-to-perform-ocr-for-a-pdf-document-using-net-Core.md b/Document-Processing/Data-Extraction/OCR/NET/how-to-perform-ocr-for-a-pdf-document-using-net-Core.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/how-to-perform-ocr-for-a-pdf-document-using-net-Core.md rename to Document-Processing/Data-Extraction/OCR/NET/how-to-perform-ocr-for-a-pdf-document-using-net-Core.md diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/net-core.md b/Document-Processing/Data-Extraction/OCR/NET/net-core.md similarity index 100% rename from Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/net-core.md rename to Document-Processing/Data-Extraction/OCR/NET/net-core.md diff --git a/Document-Processing/Data-Extraction/OCR/NET/overview.md b/Document-Processing/Data-Extraction/OCR/NET/overview.md new file mode 100644 index 0000000000..fa050f0c11 --- /dev/null +++ b/Document-Processing/Data-Extraction/OCR/NET/overview.md @@ -0,0 +1,47 @@ +--- +title: Perform OCR on PDF features | Syncfusion +description: Learn how to perform OCR on scanned PDF documents and images with different tesseract versions using Syncfusion .NET OCR library. +platform: document-processing +control: PDF +documentation: UG +keywords: Assemblies +--- + +# Overview of Optical Character Recognition (OCR) + +Optical character recognition (OCR) is a technology used to convert scanned paper documents in the form of PDF files or images into searchable and editable data. + +The [Syncfusion® OCR processor library](https://www.syncfusion.com/document-processing/pdf-framework/net/pdf-library/ocr-process) has extended support to process OCR on scanned PDF documents and images with the help of Google’s [Tesseract](https://github.com/tesseract-ocr/tesseract) Optical Character Recognition engine. + +An inbuilt `image preprocessor` has been added to the OCR to prepare images for optimal recognition. This step ensures cleaner input and reduces OCR errors. The preprocessor supports the following enhancements: + +* **Convert to Grayscale** – Simplifies image data by removing color information, making text easier to detect. +* **Deskew** – Corrects tilted or rotated text for proper alignment. +* **Denoise** – Removes speckles and artifacts that can interfere with character recognition. +* **Apply Contrast Adjustment** – Enhances text visibility against the background. +* **Apply Binarize** – Converts images to black-and-white for sharper text edges, using advanced thresholding methods + +The Syncfusion® OCR processor library works seamlessly in various platforms: Azure App Services, Azure Functions, AWS Textract, Docker, WinForms, WPF, Blazor, ASP.NET MVC, ASP.NET Core with Windows, MacOS and Linux. + +N> Starting with v20.1.0.x, if you reference Syncfusion® OCR processor assemblies from the trial setup or the NuGet feed, you also have to include a license key in your projects. Please refer to this [link](https://help.syncfusion.com/common/essential-studio/licensing/overview) to learn more about registering the Syncfusion® license key in your application to use its components. + +## Key features + +* Create a searchable PDF from scanned PDF. +* Zonal text extraction from the scanned PDF. +* Preserve Unicode characters. +* Extract text from the image. +* Create a searchable PDF from large scanned PDF documents. +* Create a searchable PDF from rotated scanned PDF. +* Get OCRed text and its bounds from a scanned PDF document. +* Native call. +* Customizing the temp folder. +* Performing OCR with different Page Segmentation Mode. +* Performing OCR with different OCR Engine Mode. +* White List. +* Black List. +* Image into searchable PDF or PDF/A. +* Improved accessibility. +* Post-processing. +* Compatible with .NET Framework 4.5 and above. +* Compatible with .NET Core 2.0 and above. diff --git a/Document-Processing/Data-Extraction/OCR/overview.md b/Document-Processing/Data-Extraction/OCR/overview.md new file mode 100644 index 0000000000..733184a8b0 --- /dev/null +++ b/Document-Processing/Data-Extraction/OCR/overview.md @@ -0,0 +1,14 @@ +--- +title: Intro to OCR Processor | Syncfusion +description: This page introduces the Syncfusion OCR Processor, describing its purpose, key capabilities, and how to get started with optical character recognition in .NET applications. +platform: document-processing +control: OCRProcessor +documentation: UG +keywords: OCR, Optical Character Recognition, Text Recognition +--- + +# Welcome to Syncfusion OCR Processor Library + +Syncfusion® OCR Processor is a high‑performance .NET library that enables accurate text recognition from scanned documents, images, and PDF files. Designed for modern .NET workflows, it processes raster images and document pages to recognize printed text, analyze page layouts, and extract textual content programmatically. + +The OCR Processor supports common document formats and provides a streamlined API for converting image‑based content into machine‑readable text, making it suitable for scenarios such as document digitization, text search, content indexing, and data processing in enterprise applications. \ No newline at end of file From a907d060a6942b53bd10dbf0e9ebeddff1d7e14f Mon Sep 17 00:00:00 2001 From: sameerkhan001 Date: Tue, 31 Mar 2026 15:54:20 +0530 Subject: [PATCH 2/5] 1018565-d: Added TOC content. --- Document-Processing-toc.html | 169 +++++++++++++++++------------------ 1 file changed, 80 insertions(+), 89 deletions(-) diff --git a/Document-Processing-toc.html b/Document-Processing-toc.html index eeb1d387d3..7585a089a1 100644 --- a/Document-Processing-toc.html +++ b/Document-Processing-toc.html @@ -9,7 +9,7 @@
  • System Requirements
  • -
  • Skills +
  • Skills
  • +
  • Smart Table Extractor
      @@ -192,10 +193,11 @@
  • +
  • Smart Form Recognizer -
  • + + + + +
  • + OCR Processor + +
  • @@ -687,29 +733,6 @@
  • Text Search
  • Annotation
  • -
  • @@ -7934,10 +7903,32 @@
  • Release Notes -
  • @@ -5139,6 +5169,9 @@
  • Azure Functions v4
  • +
  • + Azure Functions Flex Consumption +
  • @@ -5265,6 +5298,9 @@
  • Azure Functions v4
  • +
  • + Azure Functions Flex Consumption +
  • @@ -5536,6 +5572,7 @@
  • @@ -5808,6 +5845,7 @@
  • Limitations
  • +
  • @@ -7903,32 +7941,10 @@
  • Release Notes -
  • @@ -5001,9 +4992,6 @@
  • Azure Functions v4
  • -
  • - Azure Functions Flex Consumption -
  • @@ -5169,9 +5157,6 @@
  • Azure Functions v4
  • -
  • - Azure Functions Flex Consumption -
  • @@ -5298,9 +5283,6 @@
  • Azure Functions v4
  • -
  • - Azure Functions Flex Consumption -
  • From 3c95118713878c0641f7fa8e5495b5d2c82a2f1c Mon Sep 17 00:00:00 2001 From: sameerkhan001 Date: Wed, 1 Apr 2026 10:51:35 +0530 Subject: [PATCH 5/5] 1018565-d: Resolved CI failures. --- .../Data-Extraction/OCR/NET/Dot-NET-Core.md | 1175 ------------- .../OCR/NET/Dot-NET-Framework.md | 1525 ----------------- 2 files changed, 2700 deletions(-) delete mode 100644 Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Core.md delete mode 100644 Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Framework.md diff --git a/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Core.md b/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Core.md deleted file mode 100644 index 2584c84a20..0000000000 --- a/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Core.md +++ /dev/null @@ -1,1175 +0,0 @@ ---- -title: OCR processor for .NET Core with tesseract | Syncfusion -description: This section explains how to process OCR for the existing PDF documents and Images with different version tesseract. -platform: document-processing -control: PDF -documentation: UG ---- - -# Working with Optical Character Recognition - -Essential® PDF provides support for Optical Character Recognition with the help of Google’s Tesseract Optical Character Recognition engine. - -N> Starting with v20.1.0.x, if you reference Syncfusion® OCR processor assemblies from trial setup or from the NuGet feed, you also have to include a license key in your projects. Please refer to this [link](https://help.syncfusion.com/common/essential-studio/licensing/overview) to know about registering Syncfusion® license key in your application to use our components. - -## Prerequisites - -To use the OCR feature in the .NET core and .NET application, the following assemblies or NuGet packages should be added as a reference to the project: - -### Assemblies - - - - - - - - - - - - - -
    -.NET Standard 2.0

    -.NET Standard 2.0 / .NET 5/.NET 6

    -Syncfusion.Compression.Portable.dll
    -Syncfusion.Pdf.Portable.dll
    -Syncfusion.PdfImaging.Portable.dll
    -Syncfusion.OCRProcessor.Portable.dll
    -{{'[System.Drawing.Common](https://www.nuget.org/packages/System.Drawing.Common/4.5.0)'| markdownify }} package (v 4.5.0 or above) -

    -Syncfusion.Compression.NET.dll
    -Syncfusion.Pdf.NET.dll
    -Syncfusion.PdfImaging.NET.dll
    -Syncfusion.OCRProcessor.NET.dll
    -{{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/2.88.0-preview.232)'| markdownify }} package -

    - -### NuGet - - - - - - - - - - - - - - - - -
    .NET VersionNuGet Package
    -.NET Standard 2.0/.NET Standard 2.1/.NET Core 2.0/.NET Core 2.1/.NET Core 3.1 - -{{'[Syncfusion.PDF.OCR.Net.Core](https://www.nuget.org/packages/Syncfusion.PDF.OCR.Net.Core/)'| markdownify }} -
    -.NET Standard 2.0/.NET Standard 2.1/.NET Core 2.0/.NET Core 2.1/.NET Core 3.1/.NET 5.0/.NET 6.0 - -{{'[Syncfusion.PDF.OCR.NET](https://www.nuget.org/packages/Syncfusion.PDF.OCR.NET/)'| markdownify }} -
    - -N> TesseractBinaries and tessdata folders can be copied automatically from the NuGet packages. There is no need to copy these folders and set the path. - -## Prerequisites for Windows - -* Provide the TesseractBinaries windows folder path when creating a new OCR processor. Please refer to the following code snippet for windows. -{% capture codesnippet1 %} -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows"); - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet1 | OrderList_Indent_Level_1 }} - -* Provide the tesseract language data folder path (tessdata) when performing the OCR to recognize different language images. -{% capture codesnippet2 %} -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -processor.PerformOCR(lDoc, "tessdata/"); - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet2 | OrderList_Indent_Level_1 }} - -## Prerequisites for Linux - -* We are using the “System.Drawing.Common” API in the OCR Processor. So, it is mandatory to install the “libgdiplus” and “libopenjp2-7” package. Please refer to the following commands to install the packages. - - 1. sudo apt-get update - 2. sudo apt-get install libgdiplus - 3. sudo apt-get install y- libopenjp2-7 - -* Provide the TesseractBinaries Linux folder path when creating a new OCR processor. Please refer to the following code snippet for Linux. -{% capture codesnippet3 %} -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Linux"); - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet3 | OrderList_Indent_Level_1 }} - -* Provide the tesseract language data folder path (tessdata) when performing the OCR to recognize different language images. - {% capture codesnippet4 %} -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -processor.PerformOCR(lDoc, "tessdata/"); - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet4 | OrderList_Indent_Level_1 }} - -You can download the language packages from the following link - -[https://code.google.com/p/tesseract-ocr/downloads/list](https://github.com/tesseract-ocr/tessdata) - - -## Prerequisites for Mac - - -* We are internally using the “System.Drawing.Common” package to process the image and perform the OCR in the OCR Processor. So, it is mandatory to install the “'libgdiplus”, and “tesseract” packages in the Mac machine where the OCR operations occur. Please refer to the following commands to install this package. - - 1. brew install mono-libgdiplus - 2. brew install tesseract - -* Provide the TesseractBinaries Mac folder path when creating a new OCR processor. Please refer to the following code snippet for Mac. -{% capture codesnippet5 %} -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Mac"); - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet5 | OrderList_Indent_Level_1 }} - -* Provide the tesseract language data folder path (tessdata) when performing the OCR to recognize different language images. -{% capture codesnippet6 %} -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -processor.PerformOCR(lDoc, "tessdata/"); - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet6 | OrderList_Indent_Level_1 }} - -You can download the language packages from the following link - -[https://code.google.com/p/tesseract-ocr/downloads/list](https://code.google.com/p/tesseract-ocr/downloads/list) - - -## Performing OCR in Windows - -To perform the OCR in the ASP.NET Core project in Windows, refer to the following code snippet, - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor with tesseract binaries folder path -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language -processor.Settings.Language = Languages.English; - -//Perform OCR with input document and tessdata (Language packs) -processor.PerformOCR(document, @"tessdata/"); - -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the document. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Perform-OCR-for-the-entire-PDF-document). - -## Performing OCR in Linux - -To perform the OCR in the ASP.NET Core project in Linux, refer to the following code snippet, - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - -//Initialize the OCR processor with tesseract binaries folder path -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Linux")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language -processor.Settings.Language = Languages.English; - -//Perform OCR with input document and tessdata (Language packs) -processor.PerformOCR(document, @"tessdata/"); - -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the document. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - -{% endhighlight %} - -{% endtabs %} - -## Performing OCR in Mac - -To perform the OCR in the ASP.NET Core project in Mac, refer to the following code snippet, - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor with tesseract binaries folder path -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Mac")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language -processor.Settings.Language = Languages.English; - -//Perform OCR with input document and tessdata (Language packs) -processor.PerformOCR(document, @"tessdata/"); - -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the document. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - -{% endhighlight %} - -{% endtabs %} - -N> The `PerformOCR` methods return only the text OCRed by `OCRProcessor`. Other existing text in the PDF page will not be returned in this method. - -## Performing OCR for a region - -You can perform OCR on particular region of the PDF page with help of the [PageRegion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.PageRegion.html) class. Refer to the following code snippet, - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - - -//Initialize the OCR processor by providing the path of the tesseract -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); - -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -RectangleF rect = new RectangleF(0, 100, 950, 150); -//Assign rectangles to the page -List pageRegions = new List(); -//Create page region -PageRegion region = new PageRegion(); -//Set page index -region.PageIndex = 1; -//Set page region -region.PageRegions = new RectangleF[] { rect }; -//Add region to page region -pageRegions.Add(region); -//Set page regions -processor.Settings.Regions = pageRegions; -//Perform OCR with input document and tessdata (Language packs) -processor.PerformOCR(document, @"tessdata/"); - -//Creating the stream object -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the documents. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Perform-OCR-on-particular-region-of-PDF-document). - -## Performing OCR with rotated pages - -You can perform OCR on the rotated page of a PDF document. Refer to the following code snippet for the same. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - - -//Initialize the OCR processor by providing the path of tesseract -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); - -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -//Set the Page Segment Mode. -processor.Settings.PageSegment = PageSegMode.AutoOsd; - -//Process OCR by providing the PDF document, data dictionary, and language -processor.PerformOCR(document, @"tessdata/"); - -//Creating the stream object -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the documents. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Perform-OCR-on-the-rotated-page-of-the-PDF-document). - -## Performing OCR with Unicode characters - -You can perform OCR on Images with Unicode characters. To preserve the Unicode characters in the PDF document, use the UnicodeFont property. Refer to the following code snippet. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor by providing the path of tesseract -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); - -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -// Sets Unicode font to preserve the Unicode characters in a PDF document. -FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open); - -processor.UnicodeFont = new PdfTrueTypeFont(fontStream, 8); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -//Process OCR by providing the PDF document, data dictionary, and language -processor.PerformOCR(document, @"tessdata/"); - -//Creating the stream object -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the documents. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Perform-OCR-with-unicode-characters-in-a-PDF-document). - -## Layout result - -You can get the OCRed text and its bounds from an input PDF document by using the [OCRLayoutResult](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRLayoutResult.html) Class. Refer to the following code snippet. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor by providing the path of tesseract -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); - -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -//Process OCR by providing the PDF document, data dictionary, and language -processor.PerformOCR(document, @"TessData/", out result); -//Get OCRed line collection from first page -OCRLineCollection lines = result.Pages[0].Lines; -//Get each OCRed line and its bounds -foreach(Line line in lines) -{ - string text = line.Text; - RectangleF bounds = line.Rectangle; -} - -//Creating the stream object -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the documents. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Get-the-OCR'ed-text-and-its-bounds-from-an-input-PDF). - -## Performing OCR with image - -You can perform OCR with images. - -N> To perform OCR on images, we need to provide the image stream as input if you are using Syncfusion.PDF.OCR.NET package. - -Refer to the following code snippet for Syncfusion.PDF.OCR.Net.Core package: - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor by providing the path of the tesseract binaries - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) -{ -//loading the input image -FileStream stream = new FileStream(@"Input.jpeg ", FileMode.Open); - -Bitmap image = new Bitmap(stream); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -//Process OCR by providing the bitmap image, data dictionary, and language -string ocrText= processor.PerformOCR(image, @"tessdata/"); - -} - -{% endhighlight %} - -{% endtabs %} - -Refer to the following code snippet for Syncfusion.PDF.OCR.NET package: - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - -//Initialize the OCR processor by providing the path of the tesseract binaries -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) -{ - -FileStream stream = new FileStream("Helloworld.jpg", FileMode.Open); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -// Sets Unicode font to preserve the Unicode characters in a PDF document. -FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open); - -processor.UnicodeFont = new PdfTrueTypeFont(fontStream, 8); - -//Perform the OCR process for an image steam. -string ocrText = processor.PerformOCR(stream, @"tessdata/"); - -} - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Perform-OCR-on-image-file). - -## OCR an Image to PDF - -You can perform OCR on an image and convert it to a searchable PDF document. It is also possible to set PdfConformanceLevel to the output PDF document using OCRSettings. - -N> This PDF conformance option only applies for image OCR to PDF documents. - -The following code sample illustrates how to OCR an image to a PDF document: - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - -//Initialize the OCR processor by providing the path of the tesseract binaries -using (OCRProcessor processor = new OCRProcessor()) -{ -//loading the input image -FileStream imageStream = new FileStream(@"Input.png ", FileMode.Open); -Bitmap image = new Bitmap(imageStream); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -// Sets Unicode font to preserve the Unicode characters in a PDF document -FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open); - -processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10); - -// Set the PDF conformance level - -processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B; - -//Process OCR by providing the bitmap image. -PdfDocument document = processor.PerformOCR(image); - -MemoryStream stream = new MemoryStream(); - -//Save the document into stream. -document.Save(stream); -document.Close(true); - -} - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Perform-OCR-an-image-and-convert-it-to-a-PDF-document). - -## Temporary folder - -When performing OCR with an existing scanned PDF document, the OCR Processor will create temporary files images and temporary files in a temporary folder. The files will be deleted after the OCR process is completed. - -By default, the system temporary folder will be used for the process. The temporary folder path can be changed by using the [TempFolder](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TempFolder) property available in the [OCRSettings](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html) Instance. Refer to the following code snippet. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor by providing the path of the tesseract -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/Windows")) -{ -//Load a PDF document -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); - -PdfLoadedDocument document = new PdfLoadedDocument(stream); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -//Set custom temp file path location -processor.Settings.TempFolder = "D:/Temp/"; -//Process OCR by providing the PDF document, data dictionary, and language -processor.PerformOCR(document, @"tessdata/"); - -//Creating the stream object -MemoryStream outputStream = new MemoryStream(); - -//Save the document into stream. -document.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the documents. -document.Close(true); - -//Defining the ContentType for pdf file. -string contentType = "application/pdf"; - -//Define the file name. -string fileName = "Output.pdf"; - -//Creates a FileContentResult object by using the file contents, content type, and file name. -return File(outputStream, contentType, fileName); -} - - -{% endhighlight %} - -{% endtabs %} - -You can download a complete working sample from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/OCR/.NET/Set-temp-folder-while-performing-OCR). - -## Performing OCR with Azure Vision -The OCR processor supports external engines to process the OCR on Image and PDF documents. Perform the OCR using external OCR engines such as Azure Computer Vision and more. -Using the IOcrEngine interface, create an external OCR engine. Refer to the following code sample to perform OCR with Azure computer vision. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -//Initialize the OCR processor. -using (OCRProcessor processor = new OCRProcessor()) -{ -//Load a PDF document. -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); -PdfLoadedDocument lDoc = new PdfLoadedDocument(stream); - -//Set the OCR language. -processor.Settings.Language = Languages.English; - -//Initialize the Azure vision external OCR engine. -IOcrEngine azureOcrEngine = new AzureExternalOcrEngine(); - -processor.ExternalEngine = azureOcrEngine; - -//Perform OCR with an input document. -processor.PerformOCR(lDoc); - -FileStream outputStream = new FileStream(@"Output.pdf", FileMode.CreateNew); - -//Save the document into the stream. -lDoc.Save(outputStream); - -//If the position is not set to '0,' a PDF will be empty. -outputStream.Position = 0; - -//Close the document. -lDoc.Close(true); -outputStream.Close(); -} - - -{% endhighlight %} - -{% endtabs %} - -Create a new class and implement the IOcrEngine interface. Get the image stream in the PerformOCR method and process the image stream with an external OCR engine and return the OCRLayoutResult for the image. - -N> Provide a valid subscription key and endpoint to work with Azure computer vision. - -Refer to the following code sample to perform OCR with Azure computer vision. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - - -class AzureExternalOcrEngine : IOcrEngine -{ -private string subscriptionKey = "SubscriptionKey"; -private string endpoint = "endpoint"; - -public OCRLayoutResult PerformOCR(Stream imgStream) -{ -ComputerVisionClient client = Authenticate(); -ReadResult azureOcrResult = ReadFileUrl(client, imgStream).Result; - - -OCRLayoutResult result = ConvertAzureVisionOcrToOcrLayoutResult(azureOcrResult); - -return result; -} - -public ComputerVisionClient Authenticate() -{ -ComputerVisionClient client = new ComputerVisionClient(new ApiKeyServiceClientCredentials(subscriptionKey)) -{ -Endpoint = endpoint -}; -return client; -} - -public async Task ReadFileUrl(ComputerVisionClient client, Stream stream) -{ -stream.Position = 0; -var textHeaders = await client.ReadInStreamAsync(stream); -string operationLocation = textHeaders.OperationLocation; - -const int numberOfCharsInOperationId = 36; - -string operationId = operationLocation.Substring(operationLocation.Length - numberOfCharsInOperationId); -//Extract the text. -ReadOperationResult results; -do -{ -results = await client.GetReadResultAsync(Guid.Parse(operationId)); -} -while ((results.Status == OperationStatusCodes.Running || results.Status == OperationStatusCodes.NotStarted)); - -ReadResult azureOcrResult = results.AnalyzeResult.ReadResults[0]; - -return azureOcrResult; -} - -private OCRLayoutResult ConvertAzureVisionOcrToOcrLayoutResult(ReadResult azureVisionOcr) -{ -Syncfusion.OCRProcessor.Line ocrLine; -Syncfusion.OCRProcessor.Word ocrWord; - -OCRLayoutResult ocrlayoutResult = new OCRLayoutResult(); - -ocrlayoutResult.ImageWidth = (float)azureVisionOcr.Width; -ocrlayoutResult.ImageHeight = (float)azureVisionOcr.Height; - -//Page -Syncfusion.OCRProcessor.Page normalPage = new Syncfusion.OCRProcessor.Page(); - -//Lines -foreach (var line in azureVisionOcr.Lines) -{ -ocrLine = new Syncfusion.OCRProcessor.Line(); - -//Word -foreach (var word in line.Words) -{ -ocrWord = new Syncfusion.OCRProcessor.Word(); - -Rectangle rect = GetAzureVisionBounds(word.BoundingBox); - -ocrWord.Text = word.Text; -ocrWord.Rectangle = rect; - -ocrLine.Add(ocrWord); -} -normalPage.Add(ocrLine); -} - -ocrlayoutResult.Add(normalPage); - -return ocrlayoutResult; -} - -private Rectangle GetAzureVisionBounds(IList bbox) -{ -Rectangle rect = Rectangle.Empty; -PointF[] pointCollection = new PointF[bbox.Count / 2]; -int count = 0; -for (int i = 0; i < bbox.Count; i = i + 2) -{ -pointCollection[count] = new PointF((float)bbox[i], (float)bbox[i + 1]); -count++; -} -float xMin = 0; -float yMin = 0; -float xMax = 0; -float yMax = 0; -bool first = true; - -foreach (PointF point in pointCollection) -{ -if (first) -{ -xMin = point.X; -yMin = point.Y; -first = false; -} -else -{ -if (point.X < xMin) -xMin = point.X; -else if (point.X > xMax) -xMax = point.X; -if (point.Y < yMin) -yMin = point.Y; -else if (point.Y > yMax) -yMax = point.Y; -} -} - -int x = Convert.ToInt32(xMin); -int y = Convert.ToInt32(yMin); -int w = Convert.ToInt32(xMax); -int h = Convert.ToInt32(yMax); - -return new Rectangle(x, y, w, h); -} -} - - -{% endhighlight %} - -{% endtabs %} - -## Performing OCR with AWS Textract -The OCR processor supports external engines to process the OCR on Image and PDF documents. Perform the OCR using external OCR engines such as AWS Textract and more. -Using the IOcrEngine interface, create an external OCR engine. Refer to the following code sample to perform OCR with AWS Textract. -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - -//Initialize the OCR processor. -using (OCRProcessor processor = new OCRProcessor()) -{ -//Load a PDF document. -FileStream stream = new FileStream(@"Input.pdf", FileMode.Open); -PdfLoadedDocument lDoc = new PdfLoadedDocument(stream); - -//Set the OCR language. -processor.Settings.Language = Languages.English; - -//Initialize the AWS Textract external OCR engine. -IOcrEngine azureOcrEngine = new AWSExternalOcrEngine(); - -processor.ExternalEngine = azureOcrEngine; - -//Perform OCR with input document. -string text = processor.PerformOCR(lDoc); - -FileStream outputStream = new FileStream(@"Output.pdf", FileMode.Create); - -//Save the document into stream. -lDoc.Save(outputStream); - -//If the position is not set to '0' then the PDF will be empty. -outputStream.Position = 0; - -//Close the document. -lDoc.Close(); -stream.Dispose(); -outputStream.Dispose(); -} - - -{% endhighlight %} - -{% endtabs %} -Create a new class and implement the IOcrEngine interface. Get the image stream in the PerformOCR method and process the image stream with an external OCR engine and return the OCRLayoutResult for the image. - -N> Provide a valid Access key and Secret Access Key to work with AWS Textract. - -Refer to the following code sample to perform OCR with AWS Textract. - -{% tabs %} - -{% highlight c# tabtitle="ASP.NET Core" %} - -class AWSExternalOcrEngine : IOcrEngine -{ -private string awsAccessKeyId = "Access key ID"; -private string awsSecretAccessKey = "Secret access key"; -private float imageHeight; -private float imageWidth; -public OCRLayoutResult PerformOCR(Stream stream) -{ -AmazonTextractClient clientText = Authenticate(); - -DetectDocumentTextResponse textResponse = GetAWSTextractResult(clientText, stream).Result; - -OCRLayoutResult oCRLayoutResult = ConvertAWSTextractResultToOcrLayoutResult(textResponse); -return oCRLayoutResult; -} - -public AmazonTextractClient Authenticate() -{ -AmazonTextractClient client = new AmazonTextractClient(awsAccessKeyId, awsSecretAccessKey, RegionEndpoint.USEast1); -return client; -} - -public async Task GetAWSTextractResult(AmazonTextractClient client, Stream stream) -{ -stream.Position = 0; -MemoryStream memoryStream = new MemoryStream(); -stream.CopyTo(memoryStream); -PdfBitmap bitmap = new PdfBitmap(memoryStream); -imageHeight = bitmap.Height; -imageWidth = bitmap.Width; - -DetectDocumentTextResponse response = await client.DetectDocumentTextAsync(new DetectDocumentTextRequest -{ -Document = new Document -{ -Bytes = memoryStream -} -}); -return response; -} - -public OCRLayoutResult ConvertAWSTextractResultToOcrLayoutResult(DetectDocumentTextResponse textResponse) -{ -OCRLayoutResult layoutResult = new OCRLayoutResult(); -Syncfusion.OCRProcessor.Page ocrPage = new Page(); -Syncfusion.OCRProcessor.Line ocrLine; -Syncfusion.OCRProcessor.Word ocrWord; -layoutResult.ImageHeight = imageHeight; -layoutResult.ImageWidth = imageWidth; -foreach (var page in textResponse.Blocks) -{ -ocrLine = new Line(); -if (page.BlockType == "WORD") -{ -ocrWord = new Word(); -ocrWord.Text = page.Text; - -float left = page.Geometry.BoundingBox.Left; -float top = page.Geometry.BoundingBox.Top; -float width = page.Geometry.BoundingBox.Width; -float height = page.Geometry.BoundingBox.Height; -Rectangle rect = GetBoundingBox(left,top,width,height); -ocrWord.Rectangle = rect; -ocrLine.Add(ocrWord); -ocrPage.Add(ocrLine); -} -} -layoutResult.Add(ocrPage); -return layoutResult; -} -public Rectangle GetBoundingBox(float left, float top, float width, float height) -{ -int x = Convert.ToInt32(left * imageWidth); -int y = Convert.ToInt32(top * imageHeight); -int bboxWidth = Convert.ToInt32((width * imageWidth) + x); -int bboxHeight = Convert.ToInt32((height * imageHeight) + y); -Rectangle rect = new Rectangle(x,y, bboxWidth, bboxHeight); -return rect; -} -} - - -{% endhighlight %} - -{% endtabs %} - -## Troubleshooting - - - - - - - - - - - - -
    ExceptionTesseract has not been initialized exception.
    Reason -The exception may occur if the tesseract binaries and tessdata files are not available on the provided path. -
    Solution -Set the proper tesseract binaries and tessdata folder with all files and inner folders. -

    -The tessdata folder name is case sensitive and the name should not the change. -
    - - - - - - - - - - - - -
    ExceptionException has been thrown by the target of an invocation.
    Reason -If the tesseract binaries are not in the required structure. -
    Solution -To resolve this exception, ensure the tesseract binaries are in the following structure, -

    -The tesseract binaries path is TesseractBinaries/Windows and the assemblies should be in below structure, -

    -1.TesseractBinaries/Windows/x64/libletpt1753.dll,libSyncfusionTesseract.dll
    -2.TesseractBinaries/Windows/x86/libletpt1753.dll,libSyncfusionTesseract.dll -
    - - - - - - - - - - - - -
    Exceptioncan't be opened because the identity of the developer cannot be confirmed.
    Reason -This error may occur during the initial loading of OCR processor in Mac environments. -
    Solution -To resolve this issue, refer this link for more details. - -
    - - - - - - - - - - - - -
    ExceptionOCR processor doesn’t process languages other than English.
    Reason -This issue may occur, if the input image has other languages and the language and tessdata is not available for that languages. -
    Solution -Essential® PDF supports all the languages supported by Tesseract engine in the OCR processor. -The dictionary packs for the languages can be downloaded from the following online location:
    -https://code.google.com/p/tesseract-ocr/downloads/list -

    -It is also mandatory to change the corresponding language code in the OCRProcessor.Settings.Language property.
    -For example, to perform optical character recognition in German, the property should be set as
    -"processor.Settings.Language = "deu";" -
    - - - diff --git a/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Framework.md b/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Framework.md deleted file mode 100644 index 5d56a4c89c..0000000000 --- a/Document-Processing/Data-Extraction/OCR/NET/Dot-NET-Framework.md +++ /dev/null @@ -1,1525 +0,0 @@ ---- -title: OCR Processor for .NET PDF Framework with Tesseract | Syncfusion -description: This section explains the framework for processing OCR on existing PDF documents and images using different versions of Tesseract. -platform: document-processing -control: PDF -documentation: UG ---- - -# Working with Optical Character Recognition (OCR) in File Formats PDF - -Essential® PDF provides support for Optical Character Recognition with the help of Google’s Tesseract Optical Character Recognition engine. - -N> Starting with v20.1.0.x, if you reference Syncfusion® OCR processor assemblies from trial setup or from the NuGet feed, you also have to include a license key in your projects. Please refer to this [link](https://help.syncfusion.com/common/essential-studio/licensing/overview) to know about registering Syncfusion® license key in your application to use our components. - -## Prerequisites and setting up the Tesseract Engine - -* To use the OCR feature in your application, you need to add reference to the following set of assemblies. -1. Syncfusion.Compression.Base.dll -2. Syncfusion.Pdf.Base.dll -3. Syncfusion.OCRProcessor.Base.dll - -* Place the SyncfusionTesseract.dll and liblept168.dll Tesseract assemblies in the local system and provide the assembly path to the OCR processor. -{% capture codesnippet1 %} -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/") - - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - - -Dim processor As New OCRProcessor("TesseractBinaries/") - - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet1 | OrderList_Indent_Level_1 }} - -* Place the Tesseract language data {E.g eng.traineddata} in the local system and provide a path to the OCR processor -{% capture codesnippet2 %} -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/"); - -processor.PerformOCR(lDoc, @"TessData/"); - - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - - -Dim processor As New OCRProcessor("TesseractBinaries/") - -processor.PerformOCR(lDoc, "TessData/") - - - -{% endhighlight %} - -{% endtabs %} -{% endcapture %} -{{ codesnippet2 | OrderList_Indent_Level_1 }} - -You can also download the language packages from below link - -[https://github.com/tesseract-ocr/tessdata](https://github.com/tesseract-ocr/tessdata ) - -N> From 16.1.0.24 OCR is not a part of Essential® Studio and is available as separate package (OCR Processor) under the Add-On section in the below link [https://www.syncfusion.com/downloads/latest-version](https://www.syncfusion.com/account/downloads). - -N> PDF supports OCR only in Windows Forms, WPF, ASP.NET and ASP.NET MVC platforms. - -## Performing OCR for an entire document - -You can perform OCR on PDF document with the help of [OCRProcessor](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRProcessor.html) Class. Refer the below code snippet for the same. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -//Initialize the OCR processor by providing the path of tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("Input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Process OCR by providing the PDF document and Tesseract data - -processor.PerformOCR(lDoc, @"TessData/"); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - - -{% endhighlight %} - - - -{% highlight vb.net tabtitle="VB.NET" %} - - - - -'Initialize the OCR processor by providing the path of tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Process OCR by providing the PDF document and Tesseract data - -processor.PerformOCR(lDoc, "TessData/") - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - - - -{% endhighlight %} - -{% endtabs %} - -N> The PerformOCR method returns only the text OCRed by OCRProcessor. Other existing text in the PDF page won’t be returned in this method. Please check [text extraction](https://help.syncfusion.com/document-processing/pdf/pdf-library/net/working-with-text-extraction) feature for this. - -## Performing OCR with tesseract version 3.05 - -You can perform OCR using the tesseract version 3.05. The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version between 3.02 and 3.05. By default, OCR works with tesseract version 3.02. - -You must use the pre built Syncfusion® tesseract version 3.05 in the sample to run the OCR properly. The tesseract binaries are shipping with Syncfusion® NuGet package, use the following link to download the NuGet package. - -[https://www.nuget.org/packages/Syncfusion.OCRProcessor.Base](https://www.nuget.org/packages/Syncfusion.OCRProcessor.Base) - -The following sample code snippet demonstrates the OCR processor with Tesseract3.05 for PDF documents. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -using (OCRProcessor processor = new OCRProcessor(@"Tesseract3.05Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version3_05; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -Using processor As New OCRProcessor("Tesseract3.05Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version3_05 - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - -{% endhighlight %} - - {% endtabs %} - - -## Performing OCR with Tesseract Version 4.0 - -You can perform OCR using tesseract 4.0. The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version. By default, OCR will be performed with tesseract version 3.02. - -You must use the pre-built Syncfusion® tesseract 4.0 binaries in the project to run the OCR properly. The tesseract binaries are shipping with the Syncfusion® NuGet package, use the following link to download the NuGet package. - - -[https://www.nuget.org/packages/Syncfusion.PDF.OCR.WinForms](https://www.nuget.org/packages/Syncfusion.PDF.OCR.WinForms/) - -The following code sample explains the OCR processor with Tesseract4.0 for PDF documents. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -using (OCRProcessor processor = new OCRProcessor(@"Tesseract4.0Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -Using processor As New OCRProcessor("Tesseract4.0Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0 - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - -{% endhighlight %} - -{% endtabs %} - - -## Performing OCR for a region of the document - -You can perform OCR on particular region or several regions of a PDF page with the help of [PageRegion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.PageRegion.html) class. Refer the below code snippet for the same. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -//Initialize the OCR processor by providing the path of the tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("Input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -RectangleF rect = new RectangleF(0, 100, 950, 150); - -//Assign rectangles to the page - -List pageRegions = new List(); - -PageRegion region = new PageRegion(); - -region.PageIndex = 1; - -region.PageRegions = new RectangleF[] { rect }; - -pageRegions.Add(region); - -processor.Settings.Regions = pageRegions; - -//Process OCR by providing the PDF document and Tesseract data - -processor.PerformOCR(lDoc, @"TessData/"); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - - -{% endhighlight %} - - - -{% highlight vb.net tabtitle="VB.NET" %} - - -'Initialize the OCR processor by providing the path of the tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -Dim rect As New RectangleF(0, 100, 950, 150) - -'Assign rectangles to the page - -Dim pageRegions As New List(Of PageRegion)() - -Dim region As New PageRegion() - -region.PageIndex = 1 - -region.PageRegions = New RectangleF() {rect} - -pageRegions.Add(region) - -processor.Settings.Regions = pageRegions - -'Process OCR by providing the PDF document and Tesseract data - -processor.PerformOCR(lDoc, "TessData/") - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - - - -{% endhighlight %} - - {% endtabs %} - -## Performing OCR on image - -You can perform OCR on an image also. Refer the below code snippets for the same. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -//Initialize the OCR processor by providing the path of the tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//loading the input image - -Bitmap image = new Bitmap("input.jpeg"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Process OCR by providing the bitmap image, data dictionary and language - -string ocrText= processor.PerformOCR(image, @"TessData/"); - -} - - - -{% endhighlight %} - - - -{% highlight vb.net tabtitle="VB.NET" %} - - -'Initialize the OCR processor by providing the path of the tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'loading the input image - -Dim image As New Bitmap("input.jpeg") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Process OCR by providing the bitmap image, data dictionary and language - -Dim ocrText As String = processor.PerformOCR(image, "TessData/") - -End Using - - - -{% endhighlight %} - - {% endtabs %} - -## Performing OCR for large PDF documents - -You can optimize the memory to perform OCR for large PDF documents by enabling the isMemoryOptimized property in [PerformOCR](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRProcessor.html#Syncfusion_OCRProcessor_OCRProcessor_PerformOCR_Syncfusion_Pdf_Parsing_PdfLoadedDocument_System_String_System_Boolean_) method of [OCRProcessor](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRProcessor.html) class. Optimization will be effective only with Multithreading environment or PDF document with more images. This is demonstrated in the following code sample. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -//Initialize the OCR processor by providing the path of tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//Load a PDF document. - -PdfLoadedDocument lDoc = new PdfLoadedDocument("Input.pdf"); - -//Set OCR language to process. - -processor.Settings.Language = Languages.English; - -//Process OCR by providing the PDF document, Tesseract data and enable isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/",true); - -//Save the OCR processed PDF document in the disk. - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - - -{% highlight vb.net tabtitle="VB.NET" %} - - -'Initialize the OCR processor by providing the path of tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'Load a PDF document. - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process. - -processor.Settings.Language = Languages.English - -'Process OCR by providing the PDF document and Tesseract data enable isMemoryOptimized property. - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk. - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - - - -{% endhighlight %} - -{% endtabs %} - - -## Performing OCR on rotated page of PDF document - -You can perform OCR on the rotated page of a PDF document. Refer to the following code snippet for the same. - - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -//Initialize the OCR processor by providing the path of tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("Input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set OCR page auto detection rotation - -processor.Settings.AutoDetectRotation = true; - -//Process OCR by providing the PDF document - -processor.PerformOCR(lDoc, @"TessData/"); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - - -{% highlight vb.net tabtitle="VB.NET" %} - - -'Initialize the OCR processor by providing the path of tesseract binaries(SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'Load a PDF document. - -Dim lDoc As PdfLoadedDocument = New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set OCR page auto detection rotation - -processor.Settings.AutoDetectRotation = true - -'Process OCR by providing the PDF document - -processor.PerformOCR(lDoc, "TessData/") - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(true) - -End Using - - - -{% endhighlight %} - -{% endtabs %} - - - -## Layout result from OCR - -You can get the OCRed text and its bounds from a scanned PDF document by using the [OCRLayoutResult](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRLayoutResult.html) Class. Refer to the following code snippet. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - -//Initialize the OCR processor by providing the path of tesseract binaries (SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("Input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Initializes OCR layout result - -OCRLayoutResult result; - -//Process OCR by providing the PDF document, Tesseract data, and layout result - -processor.PerformOCR(lDoc, @"TessData/", out result); - -//Get OCRed line collection from first page - -OCRLineCollection lines = result.Pages[0].Lines; - -//Get each OCRed line and its bounds - -foreach(Line line in lines) -{ - string text = line.Text; - - RectangleF bounds = line.Rectangle; -} - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -//Close the document - -lDoc.Close(true); - -} - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -'Initialize the OCR processor by providing the path of tesseract binaries (SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Initializes OCR layout result - -Dim result As OCRLayoutResult - -'Process OCR by providing the PDF document, Tesseract data, and layout result - -processor.PerformOCR(lDoc, "TessData/", result) - -'Get OCRed line collection from first page - -Dim lines As OCRLineCollection = result.Pages(0).Lines - -'Get each OCRed line and its bounds - -For Each line As Line In lines - - Dim text As String = line.Text - - Dim bounds As RectangleF = line.Rectangle - -Next - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -'Close the document - -lDoc.Close(True) - -End Using - -{% endhighlight %} - -{% endtabs %} - -## Native call - -Enable native call will not launch any temporary process for OCR processing, instead it will invoke the native calls. - -### Tesseract 3.02 - -Tesseract 3.02 supports only 32-bit version. By default, this property will be disabled. - -N> Enable native call will not work in 64-bit in Tesseract 3.02 version. Instead a temporary process will be launched for OCR processing. - -The following sample code snippet demonstrates the OCR processor with native call support of tesseract 3.02. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -using (OCRProcessor processor = new OCRProcessor(@"Tesseract3.02Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version3_02; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -Using processor As New OCRProcessor("Tesseract3.02Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version3_02 - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - -{% endhighlight %} - -{% endtabs %} - -### Tesseract 3.05 - -Tesseract 3.05 supports the native call for both x86 and x64 architectures.By default, the x86 tesseract binaries are available with NuGet package or the tesseract installer. - -You can download the x64 supporting tesseract binaries from the following link. - -[Tesseract 64-bit binaries](https://www.syncfusion.com/downloads/support/directtrac/general/ze/Tesseract3.05_x641904984914) - -N> This 64-bit binaries are required only when the native call property is enabled. -N> Make sure to provide the 64-bit binaries path while using in the 64-bit environment. - -The following sample code snippet demonstrates the OCR processor with native call support of tesseract 3.05. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -using (OCRProcessor processor = new OCRProcessor(@" Tesseract3.05Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version3_05; - -//Set enable native call - -processor.Settings.EnableNativeCall = true; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - - -{% highlight vb.net tabtitle="VB.NET" %} - - -Using processor As New OCRProcessor("Tesseract3.05Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version3_05 - -'Set enable native call - -processor.Settings.EnableNativeCall = True - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc,"TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - - - -{% endhighlight %} - -{% endtabs %} - -## Customizing temp folder - -While performing OCR on an existing scanned PDF document, the OCR Processor will create temporary files (.temp, .tiff, .txt) and the files are deleted after the process is completed. You can change this temporary files folder location using the [TempFolder](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TempFolder) property available in the [OCRSettings](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html) Instance. Refer to the following code snippet. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - -//Initialize the OCR processor by providing the path of tesseract binaries (SyncfusionTesseract.dll and liblept168.dll) - -using (OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/")) - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("Input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set custom temp file path location - -processor.Settings.TempFolder = "D:/Temp/"; - -//Process OCR by providing the PDF document and Tesseract data - -processor.PerformOCR(lDoc, @"TessData/"); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -//Close the document - -lDoc.Close(true); - -} - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -'Initialize the OCR processor by providing the path of tesseract binaries (SyncfusionTesseract.dll and liblept168.dll) - -Using processor As New OCRProcessor("TesseractBinaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set custom temp file path location - -processor.Settings.TempFolder = "D:/Temp/" - -'Process OCR by providing the PDF document and Tesseract data - -processor.PerformOCR(lDoc, "TessData/") - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -'Close the document - -lDoc.Close(True) - -End Using - -{% endhighlight %} - -{% endtabs %} - - - -## Performing OCR with different Page Segmentation Mode - -You can perform OCR with various page segmentation mode. The PageSegment property is used to set the page segmentation mode. By default, OCR works with the “Auto” page segmentation mode. Kindly refer to the following code sample. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -using (OCRProcessor processor = new OCRProcessor(@"Tesseract4.0Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0; - -////Set OCR Page segment mode to process - -processor.Settings.PageSegment = PageSegmentMode.AutoOsd; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -VB - -Using processor As New OCRProcessor("Tesseract4.0Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0 - -'Set OCR page segment mode to process - - processor.Settings.PageSegment = PageSegmentMode.AutoOsd - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - - -{% endhighlight %} - -{% endtabs %} - -N> The page segmentation mode is supported only in the Tesseract version 4.0 and above. - -## Performing OCR with different OCR Engine Mode - -You can perform OCR with various OCR Engine Mode. The OCREngineMode property is used to set the OCR Engine modes. By default, OCR works with OCR Engine mode “Default”. - -This is explained in the following code sample - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -using (OCRProcessor processor = new OCRProcessor(@"Tesseract4.0Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0; - -//Set OCR engine mode to process -processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -VB - -Using processor As New OCRProcessor("Tesseract3.05Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0 - -'Set OCR engine mode to process - -processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - -{% endhighlight %} - -{% endtabs %} - -N> The OCR Engine Mode is supported only in the Tesseract version 4.0 and above. - -## White List - -A white list specifies a list of characters that the OCR engine is only allowed to recognize — if a character is not on the white list, it cannot be included in the output OCR results. - -This is explained in the following code sample, - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - -using (OCRProcessor processor = new OCRProcessor(@"Tesseract4.0Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0; - -//Set OCR engine mode to process -processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly; - -//Set WhiteList Property -Processor.Settings.WhiteList = "PDF"; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - -{% endhighlight %} - - -{% highlight vb.net tabtitle="VB.NET" %} -Using processor As New OCRProcessor("Tesseract3.05Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0 - -'Set OCR engine mode to process - -processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly - -'Set WhiteList Property - -Processor.Settings.WhiteList = "PDF" - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - -{% endhighlight %} -{% endtabs %} - -## Black List - -{% tabs %} - -{% highlight c# tabtitle="C#" %} -using (OCRProcessor processor = new OCRProcessor(@"Tesseract4.0Binaries/") - -{ - -//Load a PDF document - -PdfLoadedDocument lDoc = new PdfLoadedDocument("input.pdf"); - -//Set OCR language to process - -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0; - -//Set OCR engine mode to process -processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly; - -//Set BlackList Property -Processor.Settings. BlackList = "PDF"; - -//Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, @"TessData/", true); - -//Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf"); - -lDoc.Close(true); - -} - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} -Using processor As New OCRProcessor("Tesseract3.05Binaries/") - -'Load a PDF document - -Dim lDoc As New PdfLoadedDocument("Input.pdf") - -'Set OCR language to process - -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine - -processor.Settings.TesseractVersion = TesseractVersion.Version4_0 - -'Set OCR engine mode to process - -processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly - -'Set BlackList Property - -Processor.Settings.BlackList = "PDF" - -'Process OCR by providing the PDF document and tesseract data, and enabling the isMemoryOptimized property - -processor.PerformOCR(lDoc, "TessData/", True) - -'Save the OCR processed PDF document in the disk - -lDoc.Save("Sample.pdf") - -lDoc.Close(True) - -End Using - -{% endhighlight %} - -{% endtabs %} - -## OCR an Image to PDF - -You can perform OCR on an image and convert it to a searchable PDF document. It is also possible to set PdfConformanceLevel to the output PDF document using OCRSettings. - -N> This PDF conformance option only applies for image OCR to PDF documents. - -The following code sample illustrates how to OCR an image to a PDF document: - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - -//Initialize the OCR processor by providing the path of the tesseract binaries -using (OCRProcessor processor = new OCRProcessor()) -{ -//loading the input image -Bitmap image = new Bitmap(@"Input.png "); - -//Set OCR language to process -processor.Settings.Language = Languages.English; - -//Set tesseract OCR Engine. -processor.Settings.TesseractVersion = TesseractVersion.Version4_0; - -// Set the PDF conformance level -processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B; - -//Process OCR by providing the bitmap image -PdfDocument document = processor.PerformOCR(image); - -// Save the Document -document.Save("output.pdf"); - -//Close the Document -document.Close(true); -} - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - -'Initialize the OCR processor by providing the path of the tesseract binaries - -Using processor As New OCRProcessor() - -'loading the input image - -Dim image As New Bitmap("input.png") - -'Set OCR language to process -processor.Settings.Language = Languages.English - -'Set tesseract OCR engine -processor.Settings.TesseractVersion = TesseractVersion.Version4_0 - -'Set the PDF conformance level -processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B - -'Process OCR by providing the bitmap image -Dim document As PdfDocument = processor.PerformOCR(image) - -'Save the OCR processed PDF document on the disk - -document.Save("Sample.pdf") - -document.Close(True) - -End Using - - -{% endhighlight %} -{% endtabs %} - -## Advantages of Native Call over Normal API - -Enabling this property will process OCR with native calls (PInvoke) instead of surrogate process. -For surrogate process, it requires permission for creating and executing a process and native calls (PInvoke) does not required. And also performance will be better in PInvoke instead of surrogate process. - -## Best Practices - -**You can improve the accuracy of the OCR process by choosing the correct compression method when converting the scanned paper to a TIFF image and then to a PDF document.** - -* Use (zip) lossless compression for color or gray-scale images. -* Use CCITT Group 4 or JBIG2 (lossless) compression for monochrome images. This ensures that optical character recognition works on the highest-quality image, thereby improving the OCR accuracy. This is especially useful in low-resolution scans. -* In addition, rotated images and skewed images can also affect the accuracy and readability of the OCR process. - -**Tesseract works best with text when at least 300 dots per inch (DPI) are used, so it is beneficial to resize images.** - -For more details regarding quality improvement, refer to the following link: - -[https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality ) - -**You can set the different performance level to the OCRProcessor using [Performance](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.Performance.html) enumeration.** - -* Rapid – high speed OCR performance and provide normal OCR accuracy -* Fast – provides moderate OCR processing speed and accuracy -* Slow – Slow OCR performance and provide best OCR accuracy. - -Refer below code snippet to set the performance of the OCR. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/") - -//set the OCR performance - -processor.Settings.Performance = Performance.Fast; - - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - - -Dim processor As New OCRProcessor("TesseractBinaries/") - -'Set the OCR performance - -processor.Settings.Performance = Performance.Fast - - - -{% endhighlight %} - -{% endtabs %} - -## Troubleshooting - -**Issue:** You can get the exception “Tesseract has not been initialized” while performing OCR process. - -**Solution 1:** To resolve this, make sure the path of the Tesseract binaries and Tesseract data are properly provided as shown below. - -{% tabs %} - -{% highlight c# tabtitle="C#" %} - - -//'TesseractBinaries – path of the folder containing SyncfusionTesseract.dll and liblept168.dll - -OCRProcessor processor = new OCRProcessor(@"TesseractBinaries/"); - -//TessData – path of the folder containing the language pack - -processor.PerformOCR(lDoc, @"TessData/"); - - - -{% endhighlight %} - -{% highlight vb.net tabtitle="VB.NET" %} - - -'TesseractBinaries – path of the folder containing SyncfusionTesseract.dll and liblept168.dll - -Dim processor As New OCRProcessor("TesseractBinaries/") - -'TessData – path of the folder containing the language pack - -processor.PerformOCR(lDoc, "TessData/") - - - -{% endhighlight %} - -{% endtabs %} - -**Solution 2:** Make sure that your data file version is 3.02, since the OCR processor is built with Tesseract version 3.02. - -**Issue:** OCR processor doesn’t process languages other than English. - -**Solution:** Essential® PDF supports all the languages supported by Tesseract engine. - -The dictionary packs for the languages can be downloaded from the following online location: - -[https://github.com/tesseract-ocr/tesseract/wiki/Data-Files#data-files-for-version-302](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files#data-files-for-version-302) - -It is also mandatory to change the corresponding language code in the OCRProcessor.Settings.Language property. For example, to perform optical character recognition in German, the property should be set as processor.Settings.Language = "deu"; - -The following link contains the complete set of languages supported by Tesseract and their language codes. - -[https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc#languages](https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc#languages) -