This demo uses the DevExpress Presentation API to extract text content from presentation slides. You can process the predefined sample file or upload your own presentation. To do the latter, select Upload a File in the file selection drop-down menu.
In the Extraction Settings panel, specify the slide range and element types to extract text from (shapes, tables, or speaker notes). Click Extract Text to proceed and download the result.
Sample document
Sample.pptx
Extraction Settings
Shapes
Tables
using System.Text;
using DevExpress.Docs.Presentation;
string ExtractText(Stream inputStream, List<int> slideRange, bool extractFromShapes, bool extractFromTables, bool extractNotes) {
using var presentation = new Presentation(inputStream);
bool shouldExportFromAllSlides = slideRange.Count == 0;
var sb = new StringBuilder();
for(int i = 0; i < presentation.Slides.Count; i++) {
if(shouldExportFromAllSlides || slideRange.Contains(i)) {
var slide = presentation.Slides[i];
sb.AppendLine($"## Slide {i + 1}");
ExtractShapeText(sb, slide.Shapes, extractFromShapes, extractFromTables);
if(extractNotes) {
var notesText = ExtractTextFromNotes(slide);
if(!string.IsNullOrWhiteSpace(notesText)) {
sb.AppendLine("### Notes");
sb.AppendLine(notesText);
}
}
sb.AppendLine();
}
}
return sb.ToString();
}
void ExtractShapeText(StringBuilder sb, ShapeCollection shapes, bool extractFromShapes, bool extractFromTables) {
var sortedShapes = shapes.OrderBy(shape => shape.Y).ThenBy(shape => shape.X);
foreach(ShapeBase shape in sortedShapes) {
switch(shape) {
case Shape textShape:
if(extractFromShapes && textShape.TextArea != null)
sb.AppendLine(textShape.TextArea.Text);
break;
case Table table:
if(extractFromTables)
sb.Append(ExtractTextFromTable(table));
break;
case GroupShape groupShape:
ExtractShapeText(sb, groupShape.Shapes, extractFromShapes, extractFromTables);
break;
}
}
}
public string ExtractTextFromNotes(Slide slide) {
var notesText = string.Empty;
if(slide.Notes != null) {
var notesSlide = slide.Notes;
var notesBody = notesSlide.Shapes.Where(shape => shape is Shape noteShape && noteShape.PlaceholderSettings.Type == PlaceholderType.Body).FirstOrDefault();
notesText += notesBody != null ? ((Shape)notesBody).TextArea.Text : string.Empty;
}
return notesText;
}
public string ExtractTextFromTable(Table table) {
var sb = new StringBuilder();
for(int r = 0; r < table.Rows.Count; r++) {
for(int c = 0; c < table.Columns.Count; c++) {
var cell = table[r, c];
var cellText = cell.TextArea?.Text ?? string.Empty;
sb.Append(cellText);
if(c != table.Columns.Count - 1)
sb.Append('\t');
}
sb.AppendLine();
}
return sb.ToString();
}