Jaa


LogicalChildrenContent Axis Methods - Open XML SDK V2 Strongly Typed Object Model

This is a clipboard friendly version of the Open XML SDK V2 strongly-typed object model example that includes the LogicalChildrenContent axis methods. This is from the post, Mastering Text in Open XML Word-Processing Documents.

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOC

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using OpenXmlPowerTools;

public static class LocalExtensions
{
public static IEnumerable<OpenXmlElement> DescendantsTrimmed(
this OpenXmlElement element, Type trimType)
{
return DescendantsTrimmed(element, e => e.GetType() == trimType);
}

private static IEnumerable<OpenXmlElement> DescendantsTrimmed(
this OpenXmlElement element, Func<OpenXmlElement, bool> predicate)
{
Stack<IEnumerator<OpenXmlElement>> iteratorStack =
new Stack<IEnumerator<OpenXmlElement>>();
iteratorStack.Push(element.Elements().GetEnumerator());
while (iteratorStack.Count > 0)
{
while (iteratorStack.Peek().MoveNext())
{
OpenXmlElement currentOpenXmlElement = iteratorStack.Peek().Current;
if (predicate(currentOpenXmlElement))
{
yield return currentOpenXmlElement;
continue;
}
yield return currentOpenXmlElement;
iteratorStack.Push(currentOpenXmlElement.Elements().GetEnumerator());
}
iteratorStack.Pop();
}
}

private static Type[] SubRunLevelContent =
{
typeof(Break),
typeof(CarriageReturn),
typeof(DayLong),
typeof(DayShort),
typeof(Drawing),
typeof(MonthLong),
typeof(MonthShort),
typeof(NoBreakHyphen),
typeof(PageNumber),
typeof(Picture),
typeof(PositionalTab),
typeof(SoftHyphen),
typeof(SymbolChar),
typeof(TabChar),
typeof(Text),
typeof(YearLong),
typeof(YearShort),
typeof(AlternateContent),
};

public static IEnumerable<OpenXmlElement> LogicalChildrenContent(
this OpenXmlElement element)
{
if (element is Document)
return (IEnumerable<OpenXmlElement>)element.Descendants<Body>().Take(1)
.Cast<OpenXmlElement>();
if (element is Body ||
element is TableCell ||
element is TextBoxContent)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(e =>
e is Table ||
e is Paragraph)
.Where(e =>
e is Paragraph ||
e is Table);
if (element is Table)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(typeof(TableRow))
.Where(e => e is TableRow);
if (element is TableRow)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(typeof(TableCell))
.Where(e => e is TableCell);
if (element is Paragraph)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(e => e is Run ||
e is Picture ||
e is Drawing)
.Where(e => e is Run ||
e is Picture ||
e is Drawing);
if (element is Run)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(e => SubRunLevelContent.Contains(e.GetType()))
.Where(e => SubRunLevelContent.Contains(e.GetType()));
if (element is AlternateContent)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(e =>
e is Picture ||
e is Drawing ||
e is AlternateContentFallback)
.Where(e =>
e is Picture ||
e is Drawing);
if (element is Picture || element is Drawing)
return (IEnumerable<OpenXmlElement>)element
.DescendantsTrimmed(typeof(TextBoxContent))
.Where(e => e is TextBoxContent);
return new OpenXmlElement[] { };
}

public static IEnumerable<OpenXmlElement> LogicalChildrenContent(
this IEnumerable<OpenXmlElement> source)
{
foreach (OpenXmlElement e1 in source)
foreach (OpenXmlElement e2 in e1.LogicalChildrenContent())
yield return e2;
}

public static IEnumerable<OpenXmlElement> LogicalChildrenContent(
this OpenXmlElement element, Type typeName)
{
return element.LogicalChildrenContent().Where(e => e.GetType() == typeName);
}

public static IEnumerable<OpenXmlElement> LogicalChildrenContent(
this IEnumerable<OpenXmlElement> source, Type typeName)
{
foreach (OpenXmlElement e1 in source)
foreach (OpenXmlElement e2 in e1.LogicalChildrenContent(typeName))
yield return e2;
}

public static string StringConcatenate(this IEnumerable<string> source)
{
StringBuilder sb = new StringBuilder();
foreach (string s in source)
sb.Append(s);
return sb.ToString();
}
}

class Program
{
#if true
static void IterateContent(OpenXmlElement element, int depth)
{
if (element.GetType() == typeof(Text))
Console.WriteLine("{0}{1} >{2}<", "".PadRight(depth * 2),
element.GetType().Name, ((Text)element).Text);
else
Console.WriteLine("{0}{1}", "".PadRight(depth * 2),
element.GetType().Name);
foreach (var item in element.LogicalChildrenContent())
IterateContent(item, depth + 1);
}

static void Main(string[] args)
{
byte[] docByteArray = File.ReadAllBytes("Test.docx");
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(docByteArray, 0, docByteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
RevisionAccepter.AcceptRevisions(doc);
IterateContent(doc.MainDocumentPart.Document, 0);
}
}
}
#endif

#if false
static void Main(string[] args)
{
byte[] docByteArray = File.ReadAllBytes("Test.docx");
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(docByteArray, 0, docByteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
RevisionAccepter.AcceptRevisions(doc);
OpenXmlElement root = doc.MainDocumentPart.Document;
Body body = (Body)root.LogicalChildrenContent().First();
foreach (OpenXmlElement blockLevelContentElement in
body.LogicalChildrenContent())
{
if (blockLevelContentElement is Paragraph)
{
var text = blockLevelContentElement
.LogicalChildrenContent(typeof(Run))
.LogicalChildrenContent(typeof(Text))
//.LogicalChildrenContent()
//.OfType<Run>()
//.Cast<OpenXmlElement>()
//.LogicalChildrenContent()
.OfType<Text>()
.Select(t => t.Text)
.StringConcatenate();
Console.WriteLine("Paragraph text >{0}<", text);
continue;
}
// If element is not a paragraph, it must be a table.
Console.WriteLine("Table");
}
}
}
}
#endif

#if false
static void IterateContentAndSearch(OpenXmlElement element, string searchString)
{
if (element is Paragraph)
{
string paragraphText = element
.LogicalChildrenContent(typeof(Run))
.LogicalChildrenContent(typeof(Text))
.OfType<Text>()
.Select(s => s.Text)
.StringConcatenate();
if (paragraphText.Contains(searchString))
Console.WriteLine("Found {0}, paragraph: >{1}<", searchString, paragraphText);
}
foreach (OpenXmlElement item in element.LogicalChildrenContent())
IterateContentAndSearch(item, searchString);
}

static void Main(string[] args)
{
byte[] docByteArray = File.ReadAllBytes("Test.docx");
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(docByteArray, 0, docByteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
RevisionAccepter.AcceptRevisions(doc);
IterateContentAndSearch(doc.MainDocumentPart.Document, "control");
}
}
}
#endif
}