This demo uses the DevExpress PDF Document API (PdfDocumentProcessor) to extract text from a PDF document. You can process the predefined sample file or supply your own document. To do the latter, select Upload a File in the file selection drop-down menu.
In the Page Range Settings section, specify the pages to extract text from. Click Extract Text to extract text and download the result.
Select a Document
Sample_Alternative.pdf
Page Range Settings
using DevExpress.Pdf;
Stream GetText(Stream documentStream, IEnumerable<int> pageRange) {
using var processor = new PdfDocumentProcessor();
processor.LoadDocument(documentStream);
var outputStream = new MemoryStream();
string text = string.Empty;
if(!pageRange.Any())
text = processor.GetText();
else
foreach(var index in pageRange)
if(index < 1 || index > processor.Document.Pages.Count)
continue;
else
text += processor.GetPageText(index);
var writer = new StreamWriter(outputStream);
writer.Write(text);
writer.Flush();
outputStream.Position = 0;
return outputStream;
}