Jaa


LogicalChildrenContent Axis Methods - LINQ to XML

This is a clipboard friendly version of the LINQ to XML example that includes the LogicalChildrenContent axis methods. This is from the post, Mastering Text in Open XML Word-Processing Documents.

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOC

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;

public static class W
{
public static XNamespace w =
"https://schemas.openxmlformats.org/wordprocessingml/2006/main";
public static XName body = w + "body";
public static XName br = w + "br";
public static XName cr = w + "cr";
public static XName dayLong = w + "dayLong";
public static XName dayShort = w + "dayShort";
public static XName document = w + "document";
public static XName drawing = w + "drawing";
public static XName monthLong = w + "monthLong";
public static XName monthShort = w + "monthShort";
public static XName noBreakHyphen = w + "noBreakHyphen";
public static XName p = w + "p";
public static XName pPr = w + "pPr";
public static XName pTab = w + "pTab";
public static XName pgNum = w + "pgNum";
public static XName pict = w + "pict";
public static XName r = w + "r";
public static XName softHyphen = w + "softHyphen";
public static XName sym = w + "sym";
public static XName t = w + "t";
public static XName tab = w + "tab";
public static XName tbl = w + "tbl";
public static XName tc = w + "tc";
public static XName tr = w + "tr";
public static XName txbxContent = w + "txbxContent";
public static XName yearLong = w + "yearLong";
public static XName yearShort = w + "yearShort";
}

public static class MC
{
public static XNamespace mc = "https://schemas.openxmlformats.org/markup-compatibility/2006";
public static XName AlternateContent = mc + "AlternateContent";
public static XName Choice = mc + "Choice";
public static XName Fallback = mc + "Fallback";
}

public static class LocalExtensions
{
public static XDocument GetXDocument(this OpenXmlPart part)
{
XDocument partXDocument = part.Annotation<XDocument>();
if (partXDocument != null)
return partXDocument;
using (Stream partStream = part.GetStream())
using (XmlReader partXmlReader = XmlReader.Create(partStream))
partXDocument = XDocument.Load(partXmlReader);
part.AddAnnotation(partXDocument);
return partXDocument;
}

public static IEnumerable<XElement> DescendantsTrimmed(this XElement element,
XName trimName)
{
return DescendantsTrimmed(element, e => e.Name == trimName);
}

private static IEnumerable<XElement> DescendantsTrimmed(this XElement element,
Func<XElement, bool> predicate)
{
Stack<IEnumerator<XElement>> iteratorStack = new Stack<IEnumerator<XElement>>();
iteratorStack.Push(element.Elements().GetEnumerator());
while (iteratorStack.Count > 0)
{
while (iteratorStack.Peek().MoveNext())
{
XElement currentXElement = iteratorStack.Peek().Current;
if (predicate(currentXElement))
{
yield return currentXElement;
continue;
}
yield return currentXElement;
iteratorStack.Push(currentXElement.Elements().GetEnumerator());
}
iteratorStack.Pop();
}
}

private static XName[] SubRunLevelContent =
{
W.br,
W.cr,
W.dayLong,
W.dayShort,
W.drawing,
W.drawing,
W.monthLong,
W.monthShort,
W.noBreakHyphen,
W.pTab,
W.pgNum,
W.pict,
W.softHyphen,
W.sym,
W.t,
W.tab,
W.yearLong,
W.yearShort,
MC.AlternateContent,
};

public static IEnumerable<XElement> LogicalChildrenContent(this XElement element)
{
if (element.Name == W.document)
return element.Descendants(W.body).Take(1);
if (element.Name == W.body ||
element.Name == W.tc ||
element.Name == W.txbxContent)
return element
.DescendantsTrimmed(e =>
e.Name == W.tbl ||
e.Name == W.p)
.Where(e =>
e.Name == W.p ||
e.Name == W.tbl);
if (element.Name == W.tbl)
return element
.DescendantsTrimmed(W.tr)
.Where(e => e.Name == W.tr);
if (element.Name == W.tr)
return element
.DescendantsTrimmed(W.tc)
.Where(e => e.Name == W.tc);
if (element.Name == W.p)
return element
.DescendantsTrimmed(e => e.Name == W.r ||
e.Name == W.pict ||
e.Name == W.drawing)
.Where(e => e.Name == W.r ||
e.Name == W.pict ||
e.Name == W.drawing);
if (element.Name == W.r)
return element
.DescendantsTrimmed(e => SubRunLevelContent.Contains(e.Name))
.Where(e => SubRunLevelContent.Contains(e.Name));
if (element.Name == MC.AlternateContent)
return element
.DescendantsTrimmed(e =>
e.Name == W.pict ||
e.Name == W.drawing ||
e.Name == MC.Fallback)
.Where(e =>
e.Name == W.pict ||
e.Name == W.drawing);
if (element.Name == W.pict || element.Name == W.drawing)
return element
.DescendantsTrimmed(W.txbxContent)
.Where(e => e.Name == W.txbxContent);
return XElement.EmptySequence;
}

public static IEnumerable<XElement> LogicalChildrenContent(this IEnumerable<XElement> source)
{
foreach (XElement e1 in source)
foreach (XElement e2 in e1.LogicalChildrenContent())
yield return e2;
}

public static IEnumerable<XElement> LogicalChildrenContent(this XElement element,
XName name)
{
return element.LogicalChildrenContent().Where(e => e.Name == name);
}

public static IEnumerable<XElement> LogicalChildrenContent(
this IEnumerable<XElement> source, XName name)
{
foreach (XElement e1 in source)
foreach (XElement e2 in e1.LogicalChildrenContent(name))
yield return e2;
}

public static string StringConcatenate(this IEnumerable<string> source)
{
StringBuilder sb = new StringBuilder();
foreach (string s in source)
sb.Append(s);
return sb.ToString();
}
}

class Program
{
#if true
static void IterateContent(XElement element, int depth)
{
if (element.Name == W.t)
Console.WriteLine("{0}{1} >{2}<", "".PadRight(depth * 2), element.Name.LocalName,
(string)element);
else
Console.WriteLine("{0}{1}", "".PadRight(depth * 2), element.Name.LocalName);
foreach (XElement item in element.LogicalChildrenContent())
IterateContent(item, depth + 1);
}

static void Main(string[] args)
{
byte[] docByteArray = File.ReadAllBytes("Test.docx");
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(docByteArray, 0, docByteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
RevisionAccepter.AcceptRevisions(doc);
IterateContent(doc.MainDocumentPart.GetXDocument().Root, 0);
}
}
}
#endif

#if false
static void Main(string[] args)
{
byte[] docByteArray = File.ReadAllBytes("Test.docx");
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(docByteArray, 0, docByteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
RevisionAccepter.AcceptRevisions(doc);
XElement root = doc.MainDocumentPart.GetXDocument().Root;
XElement body = root.LogicalChildrenContent().First();
foreach (XElement blockLevelContentElement in body.LogicalChildrenContent())
{
if (blockLevelContentElement.Name == W.p)
{
var text = blockLevelContentElement
//.LogicalChildrenContent(W.r)
//.LogicalChildrenContent(W.t)
.LogicalChildrenContent()
.Where(e => e.Name == W.r)
.LogicalChildrenContent()
.Where(e => e.Name == W.t)
.Select(t => (string)t)
.StringConcatenate();
Console.WriteLine("Paragraph text >{0}<", text);
continue;
}
// If element is not a paragraph, it must be a table.
Console.WriteLine("Table");
}
}
}
}
#endif

#if false
static void IterateContentAndSearch(XElement element, string searchString)
{
if (element.Name == W.p)
{
string paragraphText = element
.LogicalChildrenContent(W.r)
.LogicalChildrenContent(W.t)
.Select(s => (string)s)
.StringConcatenate();
if (paragraphText.Contains(searchString))
Console.WriteLine("Found {0}, paragraph: >{1}<", searchString, paragraphText);
}
foreach (XElement item in element.LogicalChildrenContent())
IterateContentAndSearch(item, searchString);
}

static void Main(string[] args)
{
byte[] docByteArray = File.ReadAllBytes("Test7.docx");
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(docByteArray, 0, docByteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
RevisionAccepter.AcceptRevisions(doc);
IterateContentAndSearch(doc.MainDocumentPart.GetXDocument().Root, "control");
}
}
}
#endif
}