Проекция XML в другую форму
В этом разделе показан пример проецированного XML, который находится в форме, отличной от исходного XML.
Множество типичных преобразований XML состоят из цепочек запросов, как в примере. Принято начинать с XML в некой форме, проецировать промежуточные результаты как коллекции анонимных типов или именованных типов, затем опять проецировать результаты в XML, который совсем отличен от исходного XML.
В данном примере обрабатывается документ WordprocessingML, из которого извлекаются узлы абзацев. Также идентифицируется стиль и текст каждого абзаца. Наконец, в примере проецируется XML с другой формой. Этот пример основан на предыдущих примерах данного учебника. Новая инструкция, которая выполняет проекцию, выявляется в комментариях в нижеприведенном коде.
Инструкции по созданию исходного документа для данного примера см. в разделе Создание исходного документа Office Open XML.
В этом примере используются классы из сборки WindowsBase. Используются типы из пространства имен System.IO.Packaging.
public static class LocalExtensions
public static string StringConcatenate(this IEnumerable<string> source)
StringBuilder sb = new StringBuilder();
foreach (string s in source)
return sb.ToString();
public static string StringConcatenate<T>(this IEnumerable<T> source,
Func<T, string> func)
StringBuilder sb = new StringBuilder();
foreach (T item in source)
return sb.ToString();
public static string StringConcatenate(this IEnumerable<string> source, string separator)
StringBuilder sb = new StringBuilder();
foreach (string s in source)
return sb.ToString();
public static string StringConcatenate<T>(this IEnumerable<T> source,
Func<T, string> func, string separator)
StringBuilder sb = new StringBuilder();
foreach (T item in source)
return sb.ToString();
class Program
public static string ParagraphText(XElement e)
XNamespace w = e.Name.Namespace;
return e
.Elements(w + "r")
.Elements(w + "t")
.StringConcatenate(element => (string)element);
static void Main(string[] args)
const string fileName = "SampleDoc.docx";
const string documentRelationshipType =
const string stylesRelationshipType =
const string wordmlNamespace =
XNamespace w = wordmlNamespace;
XDocument xDoc = null;
XDocument styleDoc = null;
using (Package wdPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read))
PackageRelationship docPackageRelationship =
if (docPackageRelationship != null)
Uri documentUri = PackUriHelper.ResolvePartUri(new Uri("/", UriKind.Relative),
PackagePart documentPart = wdPackage.GetPart(documentUri);
// Load the document XML in the part into an XDocument instance.
xDoc = XDocument.Load(XmlReader.Create(documentPart.GetStream()));
// Find the styles part. There will only be one.
PackageRelationship styleRelation =
if (styleRelation != null)
Uri styleUri =
PackUriHelper.ResolvePartUri(documentUri, styleRelation.TargetUri);
PackagePart stylePart = wdPackage.GetPart(styleUri);
// Load the style XML in the part into an XDocument instance.
styleDoc = XDocument.Load(XmlReader.Create(stylePart.GetStream()));
string defaultStyle =
from style in styleDoc.Root.Elements(w + "style")
where (string)style.Attribute(w + "type") == "paragraph" &&
(string)style.Attribute(w + "default") == "1"
select style
).First().Attribute(w + "styleId");
// Find all paragraphs in the document.
var paragraphs =
from para in xDoc
.Element(w + "body")
.Descendants(w + "p")
let styleNode = para
.Elements(w + "pPr")
.Elements(w + "pStyle")
select new
ParagraphNode = para,
StyleName = styleNode != null ?
(string)styleNode.Attribute(w + "val") :
// Retrieve the text of each paragraph.
var paraWithText =
from para in paragraphs
select new
ParagraphNode = para.ParagraphNode,
StyleName = para.StyleName,
Text = ParagraphText(para.ParagraphNode)
// The following is the new code that projects XML in a new shape.
XElement root = new XElement("Root",
from p in paraWithText
select new XElement("Paragraph",
new XElement("StyleName", p.StyleName),
new XElement("Text", p.Text)
Imports <xmlns:w="https://schemas.openxmlformats.org/wordprocessingml/2006/main">
Module Module1
<System.Runtime.CompilerServices.Extension()> _
Public Function StringConcatenate(ByVal source As IEnumerable(Of String)) As String
Dim sb As StringBuilder = New StringBuilder()
For Each s As String In source
Return sb.ToString()
End Function
<System.Runtime.CompilerServices.Extension()> _
Public Function StringConcatenate(Of T)(ByVal source As IEnumerable(Of T), _
ByVal func As Func(Of T, String)) As String
Dim sb As StringBuilder = New StringBuilder()
For Each item As T In source
Return sb.ToString()
End Function
<System.Runtime.CompilerServices.Extension()> _
Public Function StringConcatenate(Of T)(ByVal source As IEnumerable(Of T), _
ByVal separator As String) As String
Dim sb As StringBuilder = New StringBuilder()
For Each s As T In source
Return sb.ToString()
End Function
<System.Runtime.CompilerServices.Extension()> _
Public Function StringConcatenate(Of T)(ByVal source As IEnumerable(Of T), _
ByVal func As Func(Of T, String), ByVal separator As String) As String
Dim sb As StringBuilder = New StringBuilder()
For Each item As T In source
Return sb.ToString()
End Function
Public Function ParagraphText(ByVal e As XElement) As String
Dim w As XNamespace = e.Name.Namespace
Return (e.<w:r>.<w:t>).StringConcatenate(Function(element) CStr(element))
End Function
' Following function is required because VB does not support short circuit evaluation
Private Function GetStyleOfParagraph(ByVal styleNode As XElement, _
ByVal defaultStyle As String) As String
If (styleNode Is Nothing) Then
Return defaultStyle
Return styleNode.@w:val
End If
End Function
Sub Main()
Dim fileName = "SampleDoc.docx"
Dim documentRelationshipType = _
Dim stylesRelationshipType = _
Dim wordmlNamespace = _
Dim xDoc As XDocument = Nothing
Dim styleDoc As XDocument = Nothing
Using wdPackage As Package = Package.Open(fileName, FileMode.Open, FileAccess.Read)
Dim docPackageRelationship As PackageRelationship = _
If (docPackageRelationship IsNot Nothing) Then
Dim documentUri As Uri = PackUriHelper.ResolvePartUri(New Uri("/", UriKind.Relative), _
Dim documentPart As PackagePart = wdPackage.GetPart(documentUri)
' Load the document XML in the part into an XDocument instance.
xDoc = XDocument.Load(XmlReader.Create(documentPart.GetStream()))
' Find the styles part. There will only be one.
Dim styleRelation As PackageRelationship = _
If (Not (styleRelation Is Nothing)) Then
Dim styleUri As Uri = _
PackUriHelper.ResolvePartUri(documentUri, styleRelation.TargetUri)
Dim stylePart As PackagePart = wdPackage.GetPart(styleUri)
' Load the style XML in the part into an XDocument instance.
styleDoc = XDocument.Load(XmlReader.Create(stylePart.GetStream()))
End If
End If
End Using
Dim defaultStyle As String = _
( _
From style In styleDoc.Root.<w:style> _
Where style.@w:type = "paragraph" And _
style.@w:default = "1" _
Select style _
' Find all paragraphs in the document.
Dim paragraphs = _
From para In xDoc.Root.<w:body>...<w:p> _
Let styleNode As XElement = para.<w:pPr>.<w:pStyle>.FirstOrDefault _
Select New With { _
.ParagraphNode = para, _
.StyleName = GetStyleOfParagraph(styleNode, defaultStyle) _
' Retrieve the text of each paragraph.
Dim paraWithText = _
From para In paragraphs _
Select New With { _
.ParagraphNode = para.ParagraphNode, _
.StyleName = para.StyleName, _
.Text = ParagraphText(para.ParagraphNode) _
' Following is the new code that projects XML in a new shape
Dim root As XElement = _
<%= _
From p In paraWithText _
Select _
<StyleName><%= p.StyleName %></StyleName>
<Text><%= p.Text %></Text>
</Paragraph> _
End Sub
End Module
В этом примере выводятся следующие данные:
<Text>Parsing WordprocessingML with LINQ to XML</Text>
<Text>The following example prints to the console.</Text>
<Text>using System;</Text>
<Text>class Program {</Text>
<Text> public static void (string[] args) {</Text>
<Text> Console.WriteLine("Hello World");</Text>
<Text> }</Text>
<Text>This example produces the following output:</Text>
<Text>Hello World</Text>
Следующие действия
В следующем примере составлен запрос на выявление всего текста в документе Word: