Share via


Complete Listing of ParseWordML

[Blog Map]  [Table of Contents]  [Next Topic]

The following code is attached to this page.

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOCusing System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Xml;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;

public class GroupOfAdjacent<TSource, TKey> :
IEnumerable<TSource>, IGrouping<TKey, TSource>
{
public TKey Key { get; set; }
private List<TSource> GroupList { get; set; }

System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return ((System.Collections.Generic.IEnumerable<TSource>)this).GetEnumerator();
}

System.Collections.Generic.IEnumerator<TSource>
System.Collections.Generic.IEnumerable<TSource>.GetEnumerator()
{
foreach (var s in GroupList)
yield return s;
}

public GroupOfAdjacent(List<TSource> source, TKey key)
{
GroupList = source;
Key = key;
}
}

public static class LocalExtensions
{
public static string GetPath(this XElement el)
{
return
el
.AncestorsAndSelf()
.Aggregate("", (seed, i) => i.Name.LocalName + "/" + seed);
}

public static string StringConcatenate(
this IEnumerable<string> source)
{
return source.Aggregate(
new StringBuilder(),
(s, i) => s.Append(i),
s => s.ToString());
}

public static string StringConcatenate<T>(
this IEnumerable<T> source,
Func<T, string> projectionFunc)
{
return source.Aggregate(
new StringBuilder(),
(s, i) => s.Append(projectionFunc(i)),
s => s.ToString());
}

public static IEnumerable<IGrouping<TKey, TSource>> GroupAdjacent<TSource, TKey>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector)
{
TKey last = default(TKey);
bool haveLast = false;
List<TSource> list = new List<TSource>();

foreach (TSource s in source)
{
TKey k = keySelector(s);
if (haveLast)
{
if (!k.Equals(last))
{
yield return new GroupOfAdjacent<TSource, TKey>(list, last);
list = new List<TSource>();
list.Add(s);
last = k;
}
else
{
list.Add(s);
last = k;
}
}
else
{
list.Add(s);
last = k;
haveLast = true;
}
}
if (haveLast)
yield return new GroupOfAdjacent<TSource, TKey>(list, last);
}
}

class Program
{
readonly static XNamespace w =
"https://schemas.openxmlformats.org/wordprocessingml/2006/main";

public static XDocument LoadXDocument(OpenXmlPart part)
{
XDocument xdoc;
using (StreamReader streamReader = new StreamReader(part.GetStream()))
xdoc = XDocument.Load(XmlReader.Create(streamReader));
return xdoc;
}

public static string GetParagraphStyle(XElement para)
{
return (string)para.Elements(w + "pPr")
.Elements(w + "pStyle")
.Attributes(w + "val")
.FirstOrDefault();
}

public static string GetCommentText(XDocument commentsDoc, string id)
{
var commentNode =
commentsDoc.Root
.Elements(w + "comment")
.Where(c => (string)c.Attribute(w + "id") == id)
.First();

var comment =
commentNode.Elements(w + "p")
.StringConcatenate(node =>
node.Descendants(w + "t")
.Select(t => (string)t)
.StringConcatenate()
+ "\n");
return comment;
}

static void Main(string[] args)
{
const string filename = "SampleDoc.docx";

using (WordprocessingDocument wordDoc =
WordprocessingDocument.Open(filename, true))
{
MainDocumentPart mainPart = wordDoc.MainDocumentPart;
StyleDefinitionsPart stylePart = mainPart.StyleDefinitionsPart;
CommentsPart commentsPart = mainPart.CommentsPart;
XDocument mainPartDoc = LoadXDocument(mainPart);
XDocument styleDoc = LoadXDocument(stylePart);
XDocument commentsDoc = LoadXDocument(commentsPart);

string defaultStyle =
(string)styleDoc.Root
.Elements(w + "style")
.Where(style =>
(string)style.Attribute(w + "type") == "paragraph" &&
(string)style.Attribute(w + "default") == "1")
.First()
.Attribute(w + "styleId");

var paragraphs =
mainPartDoc.Root
.Element(w + "body")
.Descendants(w + "p")
.Select(p =>
{
string style = GetParagraphStyle(p);
string styleName = style == null ? defaultStyle : style;
return new
{
ParagraphNode = p,
Style = styleName
};
}
);

XName r = w + "r";
XName ins = w + "ins";

var paragraphsWithText =
paragraphs.Select(p =>
new
{
ParagraphNode = p.ParagraphNode,
Style = p.Style,
Text = p.ParagraphNode
.Elements()
.Where(z => z.Name == r || z.Name == ins)
.Descendants(w + "t")
.StringConcatenate(s => (string)s)
}
);

var groupedCodeParagraphs =
paragraphsWithText.GroupAdjacent(p => p.Style)
.Where(g => g.Key == "Code");

var groupedCodeWithComments =
groupedCodeParagraphs.Select(g =>
{
var id =
(string)g.Select(p => p.ParagraphNode)
.Elements(w + "commentRangeStart")
.First()
.Attribute(w + "id");
return new
{
ParagraphGroup = g,
Comment = GetCommentText(commentsDoc, id)
};
}
);

foreach (var group in groupedCodeWithComments)
{
Console.WriteLine("Code Block");
Console.WriteLine("==========");
foreach (var paragraph in group.ParagraphGroup)
Console.WriteLine(paragraph.Text);
Console.WriteLine();
Console.WriteLine("Meta Data");
Console.WriteLine("=========");
Console.WriteLine(group.Comment);
Console.WriteLine();
}
}
}
}

[Blog Map]  [Table of Contents]  [Next Topic]

ParseWordML.cs