Initial population
This commit is contained in:
255
JRCookbookBusiness/Converters/CssStylesheet.cs
Normal file
255
JRCookbookBusiness/Converters/CssStylesheet.cs
Normal file
@@ -0,0 +1,255 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
internal class CssStylesheet
|
||||
{
|
||||
private List<StyleDefinition> _styleDefinitions;
|
||||
// Constructor
|
||||
public CssStylesheet(XmlElement htmlElement)
|
||||
{
|
||||
if (htmlElement != null)
|
||||
{
|
||||
DiscoverStyleDefinitions(htmlElement);
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively traverses an html tree, discovers STYLE elements and creates a style definition table
|
||||
// for further cascading style application
|
||||
public void DiscoverStyleDefinitions(XmlElement htmlElement)
|
||||
{
|
||||
if (htmlElement.LocalName.ToLower() == "link")
|
||||
{
|
||||
return;
|
||||
// Add LINK elements processing for included stylesheets
|
||||
// <LINK href="http://sc.msn.com/global/css/ptnr/orange.css" type=text/css \r\nrel=stylesheet>
|
||||
}
|
||||
|
||||
if (htmlElement.LocalName.ToLower() != "style")
|
||||
{
|
||||
// This is not a STYLE element. Recurse into it
|
||||
for (var htmlChildNode = htmlElement.FirstChild;
|
||||
htmlChildNode != null;
|
||||
htmlChildNode = htmlChildNode.NextSibling)
|
||||
{
|
||||
if (htmlChildNode is XmlElement)
|
||||
{
|
||||
DiscoverStyleDefinitions((XmlElement) htmlChildNode);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Add style definitions from this style.
|
||||
|
||||
// Collect all text from this style definition
|
||||
var stylesheetBuffer = new StringBuilder();
|
||||
|
||||
for (var htmlChildNode = htmlElement.FirstChild;
|
||||
htmlChildNode != null;
|
||||
htmlChildNode = htmlChildNode.NextSibling)
|
||||
{
|
||||
if (htmlChildNode is XmlText || htmlChildNode is XmlComment)
|
||||
{
|
||||
stylesheetBuffer.Append(RemoveComments(htmlChildNode.Value));
|
||||
}
|
||||
}
|
||||
|
||||
// CssStylesheet has the following syntactical structure:
|
||||
// @import declaration;
|
||||
// selector { definition }
|
||||
// where "selector" is one of: ".classname", "tagname"
|
||||
// It can contain comments in the following form: /*...*/
|
||||
|
||||
var nextCharacterIndex = 0;
|
||||
while (nextCharacterIndex < stylesheetBuffer.Length)
|
||||
{
|
||||
// Extract selector
|
||||
var selectorStart = nextCharacterIndex;
|
||||
while (nextCharacterIndex < stylesheetBuffer.Length && stylesheetBuffer[nextCharacterIndex] != '{')
|
||||
{
|
||||
// Skip declaration directive starting from @
|
||||
if (stylesheetBuffer[nextCharacterIndex] == '@')
|
||||
{
|
||||
while (nextCharacterIndex < stylesheetBuffer.Length &&
|
||||
stylesheetBuffer[nextCharacterIndex] != ';')
|
||||
{
|
||||
nextCharacterIndex++;
|
||||
}
|
||||
selectorStart = nextCharacterIndex + 1;
|
||||
}
|
||||
nextCharacterIndex++;
|
||||
}
|
||||
|
||||
if (nextCharacterIndex < stylesheetBuffer.Length)
|
||||
{
|
||||
// Extract definition
|
||||
var definitionStart = nextCharacterIndex;
|
||||
while (nextCharacterIndex < stylesheetBuffer.Length && stylesheetBuffer[nextCharacterIndex] != '}')
|
||||
{
|
||||
nextCharacterIndex++;
|
||||
}
|
||||
|
||||
// Define a style
|
||||
if (nextCharacterIndex - definitionStart > 2)
|
||||
{
|
||||
AddStyleDefinition(
|
||||
stylesheetBuffer.ToString(selectorStart, definitionStart - selectorStart),
|
||||
stylesheetBuffer.ToString(definitionStart + 1, nextCharacterIndex - definitionStart - 2));
|
||||
}
|
||||
|
||||
// Skip closing brace
|
||||
if (nextCharacterIndex < stylesheetBuffer.Length)
|
||||
{
|
||||
Debug.Assert(stylesheetBuffer[nextCharacterIndex] == '}');
|
||||
nextCharacterIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a string with all c-style comments replaced by spaces
|
||||
private string RemoveComments(string text)
|
||||
{
|
||||
var commentStart = text.IndexOf("/*", StringComparison.Ordinal);
|
||||
if (commentStart < 0)
|
||||
{
|
||||
return text;
|
||||
}
|
||||
|
||||
var commentEnd = text.IndexOf("*/", commentStart + 2, StringComparison.Ordinal);
|
||||
if (commentEnd < 0)
|
||||
{
|
||||
return text.Substring(0, commentStart);
|
||||
}
|
||||
|
||||
return text.Substring(0, commentStart) + " " + RemoveComments(text.Substring(commentEnd + 2));
|
||||
}
|
||||
|
||||
public void AddStyleDefinition(string selector, string definition)
|
||||
{
|
||||
// Notrmalize parameter values
|
||||
selector = selector.Trim().ToLower();
|
||||
definition = definition.Trim().ToLower();
|
||||
if (selector.Length == 0 || definition.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (_styleDefinitions == null)
|
||||
{
|
||||
_styleDefinitions = new List<StyleDefinition>();
|
||||
}
|
||||
|
||||
var simpleSelectors = selector.Split(',');
|
||||
|
||||
foreach (string t in simpleSelectors)
|
||||
{
|
||||
var simpleSelector = t.Trim();
|
||||
if (simpleSelector.Length > 0)
|
||||
{
|
||||
_styleDefinitions.Add(new StyleDefinition(simpleSelector, definition));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public string GetStyle(string elementName, List<XmlElement> sourceContext)
|
||||
{
|
||||
Debug.Assert(sourceContext.Count > 0);
|
||||
Debug.Assert(elementName == sourceContext[sourceContext.Count - 1].LocalName);
|
||||
|
||||
// Add id processing for style selectors
|
||||
if (_styleDefinitions != null)
|
||||
{
|
||||
for (var i = _styleDefinitions.Count - 1; i >= 0; i--)
|
||||
{
|
||||
var selector = _styleDefinitions[i].Selector;
|
||||
|
||||
var selectorLevels = selector.Split(' ');
|
||||
|
||||
var indexInSelector = selectorLevels.Length - 1;
|
||||
var indexInContext = sourceContext.Count - 1;
|
||||
var selectorLevel = selectorLevels[indexInSelector].Trim();
|
||||
|
||||
if (MatchSelectorLevel(selectorLevel, sourceContext[sourceContext.Count - 1]))
|
||||
{
|
||||
return _styleDefinitions[i].Definition;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private bool MatchSelectorLevel(string selectorLevel, XmlElement xmlElement)
|
||||
{
|
||||
if (selectorLevel.Length == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var indexOfDot = selectorLevel.IndexOf('.');
|
||||
var indexOfPound = selectorLevel.IndexOf('#');
|
||||
|
||||
string selectorClass = null;
|
||||
string selectorId = null;
|
||||
string selectorTag = null;
|
||||
if (indexOfDot >= 0)
|
||||
{
|
||||
if (indexOfDot > 0)
|
||||
{
|
||||
selectorTag = selectorLevel.Substring(0, indexOfDot);
|
||||
}
|
||||
selectorClass = selectorLevel.Substring(indexOfDot + 1);
|
||||
}
|
||||
else if (indexOfPound >= 0)
|
||||
{
|
||||
if (indexOfPound > 0)
|
||||
{
|
||||
selectorTag = selectorLevel.Substring(0, indexOfPound);
|
||||
}
|
||||
selectorId = selectorLevel.Substring(indexOfPound + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
selectorTag = selectorLevel;
|
||||
}
|
||||
|
||||
if (selectorTag != null && selectorTag != xmlElement.LocalName)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (selectorId != null && HtmlToXamlConverter.GetAttribute(xmlElement, "id") != selectorId)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (selectorClass != null && HtmlToXamlConverter.GetAttribute(xmlElement, "class") != selectorClass)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private class StyleDefinition
|
||||
{
|
||||
public readonly string Definition;
|
||||
public readonly string Selector;
|
||||
|
||||
public StyleDefinition(string selector, string definition)
|
||||
{
|
||||
Selector = selector;
|
||||
Definition = definition;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
844
JRCookbookBusiness/Converters/HtmlCSSParser.cs
Normal file
844
JRCookbookBusiness/Converters/HtmlCSSParser.cs
Normal file
@@ -0,0 +1,844 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Xml;
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
// DependencyProperty
|
||||
|
||||
// TextElement
|
||||
|
||||
internal static class HtmlCssParser
|
||||
{
|
||||
private static readonly string[] Colors =
|
||||
{
|
||||
"aliceblue", "antiquewhite", "aqua", "aquamarine", "azure", "beige", "bisque", "black", "blanchedalmond",
|
||||
"blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "coral",
|
||||
"cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgray",
|
||||
"darkgreen", "darkkhaki", "darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred",
|
||||
"darksalmon", "darkseagreen", "darkslateblue", "darkslategray", "darkturquoise", "darkviolet", "deeppink",
|
||||
"deepskyblue", "dimgray", "dodgerblue", "firebrick", "floralwhite", "forestgreen", "fuchsia", "gainsboro",
|
||||
"ghostwhite", "gold", "goldenrod", "gray", "green", "greenyellow", "honeydew", "hotpink", "indianred",
|
||||
"indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue",
|
||||
"lightcoral",
|
||||
"lightcyan", "lightgoldenrodyellow", "lightgreen", "lightgrey", "lightpink", "lightsalmon", "lightseagreen",
|
||||
"lightskyblue", "lightslategray", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "magenta",
|
||||
"maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen",
|
||||
"mediumslateblue",
|
||||
"mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose",
|
||||
"moccasin",
|
||||
"navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "palegoldenrod",
|
||||
"palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum",
|
||||
"powderblue",
|
||||
"purple", "red", "rosybrown", "royalblue", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell",
|
||||
"sienna", "silver", "skyblue", "slateblue", "slategray", "snow", "springgreen", "steelblue", "tan", "teal",
|
||||
"thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"
|
||||
};
|
||||
|
||||
private static readonly string[] SystemColors =
|
||||
{
|
||||
"activeborder", "activecaption", "appworkspace", "background", "buttonface", "buttonhighlight",
|
||||
"buttonshadow",
|
||||
"buttontext", "captiontext", "graytext", "highlight", "highlighttext", "inactiveborder", "inactivecaption",
|
||||
"inactivecaptiontext", "infobackground", "infotext", "menu", "menutext", "scrollbar", "threeddarkshadow",
|
||||
"threedface", "threedhighlight", "threedlightshadow", "threedshadow", "window", "windowframe", "windowtext"
|
||||
};
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS font Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
// CSS has five font properties: font-family, font-style, font-variant, font-weight, font-size.
|
||||
// An aggregated "font" property lets you specify in one action all the five in combination
|
||||
// with additional line-height property.
|
||||
//
|
||||
// font-family: [<family-name>,]* [<family-name> | <generic-family>]
|
||||
// generic-family: serif | sans-serif | monospace | cursive | fantasy
|
||||
// The list of families sets priorities to choose fonts;
|
||||
// Quotes not allowed around generic-family names
|
||||
// font-style: normal | italic | oblique
|
||||
// font-variant: normal | small-caps
|
||||
// font-weight: normal | bold | bolder | lighter | 100 ... 900 |
|
||||
// Default is "normal", normal==400
|
||||
// font-size: <absolute-size> | <relative-size> | <length> | <percentage>
|
||||
// absolute-size: xx-small | x-small | small | medium | large | x-large | xx-large
|
||||
// relative-size: larger | smaller
|
||||
// length: <point> | <pica> | <ex> | <em> | <points> | <millimeters> | <centimeters> | <inches>
|
||||
// Default: medium
|
||||
// font: [ <font-style> || <font-variant> || <font-weight ]? <font-size> [ / <line-height> ]? <font-family>
|
||||
|
||||
private static readonly string[] FontGenericFamilies =
|
||||
{
|
||||
"serif", "sans-serif", "monospace", "cursive", "fantasy"
|
||||
};
|
||||
|
||||
private static readonly string[] FontStyles = {"normal", "italic", "oblique"};
|
||||
private static readonly string[] FontVariants = {"normal", "small-caps"};
|
||||
|
||||
private static readonly string[] FontWeights =
|
||||
{
|
||||
"normal", "bold", "bolder", "lighter", "100", "200", "300",
|
||||
"400", "500", "600", "700", "800", "900"
|
||||
};
|
||||
|
||||
private static readonly string[] FontAbsoluteSizes =
|
||||
{
|
||||
"xx-small", "x-small", "small", "medium", "large",
|
||||
"x-large", "xx-large"
|
||||
};
|
||||
|
||||
private static readonly string[] FontRelativeSizes = {"larger", "smaller"};
|
||||
|
||||
private static readonly string[] FontSizeUnits = {"px", "mm", "cm", "in", "pt", "pc", "em", "ex", "%"};
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS list-style Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
// list-style: [ <list-style-type> || <list-style-position> || <list-style-image> ]
|
||||
|
||||
private static readonly string[] ListStyleTypes =
|
||||
{
|
||||
"disc", "circle", "square", "decimal", "lower-roman",
|
||||
"upper-roman", "lower-alpha", "upper-alpha", "none"
|
||||
};
|
||||
|
||||
private static readonly string[] ListStylePositions = {"inside", "outside"};
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS text-decorations Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] TextDecorations = {"none", "underline", "overline", "line-through", "blink"};
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS text-transform Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] TextTransforms = {"none", "capitalize", "uppercase", "lowercase"};
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS text-align Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] TextAligns = {"left", "right", "center", "justify"};
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS vertical-align Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] VerticalAligns =
|
||||
{
|
||||
"baseline", "sub", "super", "top", "text-top", "middle",
|
||||
"bottom", "text-bottom"
|
||||
};
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS float Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] Floats = {"left", "right", "none"};
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS clear Property
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] Clears = {"none", "left", "right", "both"};
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS border-style Propertie
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static readonly string[] BorderStyles =
|
||||
{
|
||||
"none", "dotted", "dashed", "solid", "double", "groove",
|
||||
"ridge", "inset", "outset"
|
||||
};
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// What are these definitions doing here:
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static string[] _blocks = {"block", "inline", "list-item", "none"};
|
||||
// .................................................................
|
||||
//
|
||||
// Processing CSS Attributes
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
internal static void GetElementPropertiesFromCssAttributes(XmlElement htmlElement, string elementName,
|
||||
CssStylesheet stylesheet, Hashtable localProperties, List<XmlElement> sourceContext)
|
||||
{
|
||||
var styleFromStylesheet = stylesheet.GetStyle(elementName, sourceContext);
|
||||
|
||||
var styleInline = HtmlToXamlConverter.GetAttribute(htmlElement, "style");
|
||||
|
||||
// Combine styles from stylesheet and from inline attribute.
|
||||
// The order is important - the latter styles will override the former.
|
||||
var style = styleFromStylesheet ?? null;
|
||||
if (styleInline != null)
|
||||
{
|
||||
style = style == null ? styleInline : (style + ";" + styleInline);
|
||||
}
|
||||
|
||||
// Apply local style to current formatting properties
|
||||
if (style != null)
|
||||
{
|
||||
var styleValues = style.Split(';');
|
||||
foreach (string t in styleValues)
|
||||
{
|
||||
string[] styleNameValue;
|
||||
|
||||
styleNameValue = t.Split(':');
|
||||
if (styleNameValue.Length == 2)
|
||||
{
|
||||
var styleName = styleNameValue[0].Trim().ToLower();
|
||||
var styleValue = HtmlToXamlConverter.UnQuote(styleNameValue[1].Trim()).ToLower();
|
||||
var nextIndex = 0;
|
||||
|
||||
switch (styleName)
|
||||
{
|
||||
case "font":
|
||||
ParseCssFont(styleValue, localProperties);
|
||||
break;
|
||||
case "font-family":
|
||||
ParseCssFontFamily(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "font-size":
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, "font-size",
|
||||
/*mustBeNonNegative:*/true);
|
||||
break;
|
||||
case "font-style":
|
||||
ParseCssFontStyle(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "font-weight":
|
||||
ParseCssFontWeight(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "font-variant":
|
||||
ParseCssFontVariant(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "line-height":
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, "line-height",
|
||||
/*mustBeNonNegative:*/true);
|
||||
break;
|
||||
case "word-spacing":
|
||||
// Implement word-spacing conversion
|
||||
break;
|
||||
case "letter-spacing":
|
||||
// Implement letter-spacing conversion
|
||||
break;
|
||||
case "color":
|
||||
ParseCssColor(styleValue, ref nextIndex, localProperties, "color");
|
||||
break;
|
||||
|
||||
case "text-decoration":
|
||||
ParseCssTextDecoration(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
|
||||
case "text-transform":
|
||||
ParseCssTextTransform(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
|
||||
case "background-color":
|
||||
ParseCssColor(styleValue, ref nextIndex, localProperties, "background-color");
|
||||
break;
|
||||
case "background":
|
||||
// TODO: need to parse composite background property
|
||||
ParseCssBackground(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
|
||||
case "text-align":
|
||||
ParseCssTextAlign(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "vertical-align":
|
||||
ParseCssVerticalAlign(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "text-indent":
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, "text-indent",
|
||||
/*mustBeNonNegative:*/false);
|
||||
break;
|
||||
|
||||
case "width":
|
||||
case "height":
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, styleName,
|
||||
/*mustBeNonNegative:*/true);
|
||||
break;
|
||||
|
||||
case "margin": // top/right/bottom/left
|
||||
ParseCssRectangleProperty(styleValue, ref nextIndex, localProperties, styleName);
|
||||
break;
|
||||
case "margin-top":
|
||||
case "margin-right":
|
||||
case "margin-bottom":
|
||||
case "margin-left":
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, styleName,
|
||||
/*mustBeNonNegative:*/true);
|
||||
break;
|
||||
|
||||
case "padding":
|
||||
ParseCssRectangleProperty(styleValue, ref nextIndex, localProperties, styleName);
|
||||
break;
|
||||
case "padding-top":
|
||||
case "padding-right":
|
||||
case "padding-bottom":
|
||||
case "padding-left":
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, styleName,
|
||||
/*mustBeNonNegative:*/true);
|
||||
break;
|
||||
|
||||
case "border":
|
||||
ParseCssBorder(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "border-style":
|
||||
case "border-width":
|
||||
case "border-color":
|
||||
ParseCssRectangleProperty(styleValue, ref nextIndex, localProperties, styleName);
|
||||
break;
|
||||
case "border-top":
|
||||
case "border-right":
|
||||
case "border-left":
|
||||
case "border-bottom":
|
||||
// Parse css border style
|
||||
break;
|
||||
|
||||
// NOTE: css names for elementary border styles have side indications in the middle (top/bottom/left/right)
|
||||
// In our internal notation we intentionally put them at the end - to unify processing in ParseCssRectangleProperty method
|
||||
case "border-top-style":
|
||||
case "border-right-style":
|
||||
case "border-left-style":
|
||||
case "border-bottom-style":
|
||||
case "border-top-color":
|
||||
case "border-right-color":
|
||||
case "border-left-color":
|
||||
case "border-bottom-color":
|
||||
case "border-top-width":
|
||||
case "border-right-width":
|
||||
case "border-left-width":
|
||||
case "border-bottom-width":
|
||||
// Parse css border style
|
||||
break;
|
||||
|
||||
case "display":
|
||||
// Implement display style conversion
|
||||
break;
|
||||
|
||||
case "float":
|
||||
ParseCssFloat(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
case "clear":
|
||||
ParseCssClear(styleValue, ref nextIndex, localProperties);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Parsing CSS - Lexical Helpers
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
// Skips whitespaces in style values
|
||||
private static void ParseWhiteSpace(string styleValue, ref int nextIndex)
|
||||
{
|
||||
while (nextIndex < styleValue.Length && char.IsWhiteSpace(styleValue[nextIndex]))
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if the following character matches to a given word and advances nextIndex
|
||||
// by the word's length in case of success.
|
||||
// Otherwise leaves nextIndex in place (except for possible whitespaces).
|
||||
// Returns true or false depending on success or failure of matching.
|
||||
private static bool ParseWord(string word, string styleValue, ref int nextIndex)
|
||||
{
|
||||
ParseWhiteSpace(styleValue, ref nextIndex);
|
||||
|
||||
for (var i = 0; i < word.Length; i++)
|
||||
{
|
||||
if (!(nextIndex + i < styleValue.Length && word[i] == styleValue[nextIndex + i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (nextIndex + word.Length < styleValue.Length && char.IsLetterOrDigit(styleValue[nextIndex + word.Length]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
nextIndex += word.Length;
|
||||
return true;
|
||||
}
|
||||
|
||||
// CHecks whether the following character sequence matches to one of the given words,
|
||||
// and advances the nextIndex to matched word length.
|
||||
// Returns null in case if there is no match or the word matched.
|
||||
private static string ParseWordEnumeration(string[] words, string styleValue, ref int nextIndex)
|
||||
{
|
||||
foreach (string t in words)
|
||||
{
|
||||
if (ParseWord(t, styleValue, ref nextIndex))
|
||||
{
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static void ParseWordEnumeration(string[] words, string styleValue, ref int nextIndex,
|
||||
Hashtable localProperties, string attributeName)
|
||||
{
|
||||
var attributeValue = ParseWordEnumeration(words, styleValue, ref nextIndex);
|
||||
if (attributeValue != null)
|
||||
{
|
||||
localProperties[attributeName] = attributeValue;
|
||||
}
|
||||
}
|
||||
|
||||
private static string ParseCssSize(string styleValue, ref int nextIndex, bool mustBeNonNegative)
|
||||
{
|
||||
ParseWhiteSpace(styleValue, ref nextIndex);
|
||||
|
||||
var startIndex = nextIndex;
|
||||
|
||||
// Parse optional munis sign
|
||||
if (nextIndex < styleValue.Length && styleValue[nextIndex] == '-')
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
|
||||
if (nextIndex < styleValue.Length && char.IsDigit(styleValue[nextIndex]))
|
||||
{
|
||||
while (nextIndex < styleValue.Length &&
|
||||
(char.IsDigit(styleValue[nextIndex]) || styleValue[nextIndex] == '.'))
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
|
||||
var number = styleValue.Substring(startIndex, nextIndex - startIndex);
|
||||
|
||||
var unit = ParseWordEnumeration(FontSizeUnits, styleValue, ref nextIndex) ?? "px";
|
||||
|
||||
if (mustBeNonNegative && styleValue[startIndex] == '-')
|
||||
{
|
||||
return "0";
|
||||
}
|
||||
return number + unit;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static void ParseCssSize(string styleValue, ref int nextIndex, Hashtable localValues,
|
||||
string propertyName,
|
||||
bool mustBeNonNegative)
|
||||
{
|
||||
var length = ParseCssSize(styleValue, ref nextIndex, mustBeNonNegative);
|
||||
if (length != null)
|
||||
{
|
||||
localValues[propertyName] = length;
|
||||
}
|
||||
}
|
||||
|
||||
private static string ParseCssColor(string styleValue, ref int nextIndex)
|
||||
{
|
||||
// Implement color parsing
|
||||
// rgb(100%,53.5%,10%)
|
||||
// rgb(255,91,26)
|
||||
// #FF5B1A
|
||||
// black | silver | gray | ... | aqua
|
||||
// transparent - for background-color
|
||||
ParseWhiteSpace(styleValue, ref nextIndex);
|
||||
|
||||
string color = null;
|
||||
|
||||
if (nextIndex < styleValue.Length)
|
||||
{
|
||||
var startIndex = nextIndex;
|
||||
var character = styleValue[nextIndex];
|
||||
|
||||
if (character == '#')
|
||||
{
|
||||
nextIndex++;
|
||||
while (nextIndex < styleValue.Length)
|
||||
{
|
||||
character = char.ToUpper(styleValue[nextIndex]);
|
||||
if (!('0' <= character && character <= '9' || 'A' <= character && character <= 'F'))
|
||||
{
|
||||
break;
|
||||
}
|
||||
nextIndex++;
|
||||
}
|
||||
if (nextIndex > startIndex + 1)
|
||||
{
|
||||
color = styleValue.Substring(startIndex, nextIndex - startIndex);
|
||||
}
|
||||
}
|
||||
else if (styleValue.Substring(nextIndex, 3).ToLower() == "rbg")
|
||||
{
|
||||
// Implement real rgb() color parsing
|
||||
while (nextIndex < styleValue.Length && styleValue[nextIndex] != ')')
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
if (nextIndex < styleValue.Length)
|
||||
{
|
||||
nextIndex++; // to skip ')'
|
||||
}
|
||||
color = "gray"; // return bogus color
|
||||
}
|
||||
else if (char.IsLetter(character))
|
||||
{
|
||||
color = ParseWordEnumeration(Colors, styleValue, ref nextIndex);
|
||||
if (color == null)
|
||||
{
|
||||
color = ParseWordEnumeration(SystemColors, styleValue, ref nextIndex);
|
||||
if (color != null)
|
||||
{
|
||||
// Implement smarter system color converions into real colors
|
||||
color = "black";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
private static void ParseCssColor(string styleValue, ref int nextIndex, Hashtable localValues,
|
||||
string propertyName)
|
||||
{
|
||||
var color = ParseCssColor(styleValue, ref nextIndex);
|
||||
if (color != null)
|
||||
{
|
||||
localValues[propertyName] = color;
|
||||
}
|
||||
}
|
||||
|
||||
// Parses CSS string fontStyle representing a value for css font attribute
|
||||
private static void ParseCssFont(string styleValue, Hashtable localProperties)
|
||||
{
|
||||
var nextIndex = 0;
|
||||
|
||||
ParseCssFontStyle(styleValue, ref nextIndex, localProperties);
|
||||
ParseCssFontVariant(styleValue, ref nextIndex, localProperties);
|
||||
ParseCssFontWeight(styleValue, ref nextIndex, localProperties);
|
||||
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, "font-size", /*mustBeNonNegative:*/true);
|
||||
|
||||
ParseWhiteSpace(styleValue, ref nextIndex);
|
||||
if (nextIndex < styleValue.Length && styleValue[nextIndex] == '/')
|
||||
{
|
||||
nextIndex++;
|
||||
ParseCssSize(styleValue, ref nextIndex, localProperties, "line-height", /*mustBeNonNegative:*/true);
|
||||
}
|
||||
|
||||
ParseCssFontFamily(styleValue, ref nextIndex, localProperties);
|
||||
}
|
||||
|
||||
private static void ParseCssFontStyle(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(FontStyles, styleValue, ref nextIndex, localProperties, "font-style");
|
||||
}
|
||||
|
||||
private static void ParseCssFontVariant(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(FontVariants, styleValue, ref nextIndex, localProperties, "font-variant");
|
||||
}
|
||||
|
||||
private static void ParseCssFontWeight(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(FontWeights, styleValue, ref nextIndex, localProperties, "font-weight");
|
||||
}
|
||||
|
||||
private static void ParseCssFontFamily(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
string fontFamilyList = null;
|
||||
|
||||
while (nextIndex < styleValue.Length)
|
||||
{
|
||||
// Try generic-family
|
||||
var fontFamily = ParseWordEnumeration(FontGenericFamilies, styleValue, ref nextIndex);
|
||||
|
||||
if (fontFamily == null)
|
||||
{
|
||||
// Try quoted font family name
|
||||
if (nextIndex < styleValue.Length && (styleValue[nextIndex] == '"' || styleValue[nextIndex] == '\''))
|
||||
{
|
||||
var quote = styleValue[nextIndex];
|
||||
|
||||
nextIndex++;
|
||||
|
||||
var startIndex = nextIndex;
|
||||
|
||||
while (nextIndex < styleValue.Length && styleValue[nextIndex] != quote)
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
|
||||
fontFamily = '"' + styleValue.Substring(startIndex, nextIndex - startIndex) + '"';
|
||||
}
|
||||
|
||||
if (fontFamily == null)
|
||||
{
|
||||
// Try unquoted font family name
|
||||
var startIndex = nextIndex;
|
||||
while (nextIndex < styleValue.Length && styleValue[nextIndex] != ',' &&
|
||||
styleValue[nextIndex] != ';')
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
|
||||
if (nextIndex > startIndex)
|
||||
{
|
||||
fontFamily = styleValue.Substring(startIndex, nextIndex - startIndex).Trim();
|
||||
if (fontFamily.Length == 0)
|
||||
{
|
||||
fontFamily = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ParseWhiteSpace(styleValue, ref nextIndex);
|
||||
if (nextIndex < styleValue.Length && styleValue[nextIndex] == ',')
|
||||
{
|
||||
nextIndex++;
|
||||
}
|
||||
|
||||
if (fontFamily != null)
|
||||
{
|
||||
// css font-family can contein a list of names. We only consider the first name from the list. Need a decision what to do with remaining names
|
||||
// fontFamilyList = (fontFamilyList == null) ? fontFamily : fontFamilyList + "," + fontFamily;
|
||||
if (fontFamilyList == null && fontFamily.Length > 0)
|
||||
{
|
||||
if (fontFamily[0] == '"' || fontFamily[0] == '\'')
|
||||
{
|
||||
// Unquote the font family name
|
||||
fontFamily = fontFamily.Substring(1, fontFamily.Length - 2);
|
||||
}
|
||||
fontFamilyList = fontFamily;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fontFamilyList != null)
|
||||
{
|
||||
localProperties["font-family"] = fontFamilyList;
|
||||
}
|
||||
}
|
||||
|
||||
private static void ParseCssListStyle(string styleValue, Hashtable localProperties)
|
||||
{
|
||||
var nextIndex = 0;
|
||||
|
||||
while (nextIndex < styleValue.Length)
|
||||
{
|
||||
var listStyleType = ParseCssListStyleType(styleValue, ref nextIndex);
|
||||
if (listStyleType != null)
|
||||
{
|
||||
localProperties["list-style-type"] = listStyleType;
|
||||
}
|
||||
else
|
||||
{
|
||||
var listStylePosition = ParseCssListStylePosition(styleValue, ref nextIndex);
|
||||
if (listStylePosition != null)
|
||||
{
|
||||
localProperties["list-style-position"] = listStylePosition;
|
||||
}
|
||||
else
|
||||
{
|
||||
var listStyleImage = ParseCssListStyleImage(styleValue, ref nextIndex);
|
||||
if (listStyleImage != null)
|
||||
{
|
||||
localProperties["list-style-image"] = listStyleImage;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: Process unrecognized list style value
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string ParseCssListStyleType(string styleValue, ref int nextIndex) => ParseWordEnumeration(ListStyleTypes, styleValue, ref nextIndex);
|
||||
|
||||
private static string ParseCssListStylePosition(string styleValue, ref int nextIndex) => ParseWordEnumeration(ListStylePositions, styleValue, ref nextIndex);
|
||||
|
||||
private static string ParseCssListStyleImage(string styleValue, ref int nextIndex) => null;
|
||||
|
||||
private static void ParseCssTextDecoration(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
// Set default text-decorations:none;
|
||||
for (var i = 1; i < TextDecorations.Length; i++)
|
||||
{
|
||||
localProperties["text-decoration-" + TextDecorations[i]] = "false";
|
||||
}
|
||||
|
||||
// Parse list of decorations values
|
||||
while (nextIndex < styleValue.Length)
|
||||
{
|
||||
var decoration = ParseWordEnumeration(TextDecorations, styleValue, ref nextIndex);
|
||||
if (decoration == null || decoration == "none")
|
||||
{
|
||||
break;
|
||||
}
|
||||
localProperties["text-decoration-" + decoration] = "true";
|
||||
}
|
||||
}
|
||||
|
||||
private static void ParseCssTextTransform(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(TextTransforms, styleValue, ref nextIndex, localProperties, "text-transform");
|
||||
}
|
||||
|
||||
private static void ParseCssTextAlign(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(TextAligns, styleValue, ref nextIndex, localProperties, "text-align");
|
||||
}
|
||||
|
||||
private static void ParseCssVerticalAlign(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
// Parse percentage value for vertical-align style
|
||||
ParseWordEnumeration(VerticalAligns, styleValue, ref nextIndex, localProperties, "vertical-align");
|
||||
}
|
||||
|
||||
private static void ParseCssFloat(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(Floats, styleValue, ref nextIndex, localProperties, "float");
|
||||
}
|
||||
|
||||
private static void ParseCssClear(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
ParseWordEnumeration(Clears, styleValue, ref nextIndex, localProperties, "clear");
|
||||
}
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS margin and padding Properties
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
// Generic method for parsing any of four-values properties, such as margin, padding, border-width, border-style, border-color
|
||||
private static bool ParseCssRectangleProperty(string styleValue, ref int nextIndex, Hashtable localProperties,
|
||||
string propertyName)
|
||||
{
|
||||
// CSS Spec:
|
||||
// If only one value is set, then the value applies to all four sides;
|
||||
// If two or three values are set, then missinng value(s) are taken fromm the opposite side(s).
|
||||
// The order they are applied is: top/right/bottom/left
|
||||
|
||||
Debug.Assert(propertyName == "margin" || propertyName == "padding" || propertyName == "border-width" ||
|
||||
propertyName == "border-style" || propertyName == "border-color");
|
||||
|
||||
var value = propertyName == "border-color"
|
||||
? ParseCssColor(styleValue, ref nextIndex)
|
||||
: propertyName == "border-style"
|
||||
? ParseCssBorderStyle(styleValue, ref nextIndex)
|
||||
: ParseCssSize(styleValue, ref nextIndex, /*mustBeNonNegative:*/true);
|
||||
if (value != null)
|
||||
{
|
||||
localProperties[propertyName + "-top"] = value;
|
||||
localProperties[propertyName + "-bottom"] = value;
|
||||
localProperties[propertyName + "-right"] = value;
|
||||
localProperties[propertyName + "-left"] = value;
|
||||
value = propertyName == "border-color"
|
||||
? ParseCssColor(styleValue, ref nextIndex)
|
||||
: propertyName == "border-style"
|
||||
? ParseCssBorderStyle(styleValue, ref nextIndex)
|
||||
: ParseCssSize(styleValue, ref nextIndex, /*mustBeNonNegative:*/true);
|
||||
if (value != null)
|
||||
{
|
||||
localProperties[propertyName + "-right"] = value;
|
||||
localProperties[propertyName + "-left"] = value;
|
||||
value = propertyName == "border-color"
|
||||
? ParseCssColor(styleValue, ref nextIndex)
|
||||
: propertyName == "border-style"
|
||||
? ParseCssBorderStyle(styleValue, ref nextIndex)
|
||||
: ParseCssSize(styleValue, ref nextIndex, /*mustBeNonNegative:*/true);
|
||||
if (value != null)
|
||||
{
|
||||
localProperties[propertyName + "-bottom"] = value;
|
||||
value = propertyName == "border-color"
|
||||
? ParseCssColor(styleValue, ref nextIndex)
|
||||
: propertyName == "border-style"
|
||||
? ParseCssBorderStyle(styleValue, ref nextIndex)
|
||||
: ParseCssSize(styleValue, ref nextIndex, /*mustBeNonNegative:*/true);
|
||||
if (value != null)
|
||||
{
|
||||
localProperties[propertyName + "-left"] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS border Properties
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
// border: [ <border-width> || <border-style> || <border-color> ]
|
||||
|
||||
private static void ParseCssBorder(string styleValue, ref int nextIndex, Hashtable localProperties)
|
||||
{
|
||||
while (
|
||||
ParseCssRectangleProperty(styleValue, ref nextIndex, localProperties, "border-width") ||
|
||||
ParseCssRectangleProperty(styleValue, ref nextIndex, localProperties, "border-style") ||
|
||||
ParseCssRectangleProperty(styleValue, ref nextIndex, localProperties, "border-color"))
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private static string ParseCssBorderStyle(string styleValue, ref int nextIndex) => ParseWordEnumeration(BorderStyles, styleValue, ref nextIndex);
|
||||
|
||||
// .................................................................
|
||||
//
|
||||
// Pasring CSS Background Properties
|
||||
//
|
||||
// .................................................................
|
||||
|
||||
private static void ParseCssBackground(string styleValue, ref int nextIndex, Hashtable localValues)
|
||||
{
|
||||
// Implement parsing background attribute
|
||||
}
|
||||
}
|
||||
}
|
||||
26
JRCookbookBusiness/Converters/HtmlEncodedTextWriter.cs
Normal file
26
JRCookbookBusiness/Converters/HtmlEncodedTextWriter.cs
Normal file
@@ -0,0 +1,26 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
public class HtmlEncodedTextWriter : XmlTextWriter
|
||||
{
|
||||
public HtmlEncodedTextWriter(TextWriter w) : base(w) { }
|
||||
|
||||
#region Overrides of XmlTextWriter
|
||||
|
||||
/// <inheritdoc />
|
||||
public override void WriteString(string text)
|
||||
{
|
||||
text = WebUtility.HtmlEncode(text);
|
||||
WriteRaw(text);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
577
JRCookbookBusiness/Converters/HtmlFromXamlConverter.cs
Normal file
577
JRCookbookBusiness/Converters/HtmlFromXamlConverter.cs
Normal file
@@ -0,0 +1,577 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
/// <summary>
|
||||
/// HtmlToXamlConverter is a static class that takes an HTML string
|
||||
/// and converts it into XAML
|
||||
/// </summary>
|
||||
internal static class HtmlFromXamlConverter
|
||||
{
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Internal Methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Internal Methods
|
||||
|
||||
/// <summary>
|
||||
/// Main entry point for Xaml-to-Html converter.
|
||||
/// Converts a xaml string into html string.
|
||||
/// </summary>
|
||||
/// <param name="xamlString">
|
||||
/// Xaml strinng to convert.
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// Html string produced from a source xaml.
|
||||
/// </returns>
|
||||
internal static string ConvertXamlToHtml(string xamlString)
|
||||
{
|
||||
XmlTextReader xamlReader;
|
||||
StringBuilder htmlStringBuilder;
|
||||
XmlTextWriter htmlWriter;
|
||||
|
||||
xamlReader = new XmlTextReader(new StringReader(xamlString));
|
||||
|
||||
htmlStringBuilder = new StringBuilder(100);
|
||||
htmlWriter = new HtmlEncodedTextWriter(new StringWriter(htmlStringBuilder));
|
||||
|
||||
if (!WriteFlowDocument(xamlReader, htmlWriter))
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
var htmlString = htmlStringBuilder.ToString();
|
||||
|
||||
return htmlString;
|
||||
}
|
||||
|
||||
#endregion Internal Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Methods
|
||||
|
||||
/// <summary>
|
||||
/// Processes a root level element of XAML (normally it's FlowDocument element).
|
||||
/// </summary>
|
||||
/// <param name="xamlReader">
|
||||
/// XmlTextReader for a source xaml.
|
||||
/// </param>
|
||||
/// <param name="htmlWriter">
|
||||
/// XmlTextWriter producing resulting html
|
||||
/// </param>
|
||||
private static bool WriteFlowDocument(XmlTextReader xamlReader, XmlTextWriter htmlWriter)
|
||||
{
|
||||
if (!ReadNextToken(xamlReader))
|
||||
{
|
||||
// Xaml content is empty - nothing to convert
|
||||
return false;
|
||||
}
|
||||
|
||||
if (xamlReader.NodeType != XmlNodeType.Element || xamlReader.Name != "FlowDocument")
|
||||
{
|
||||
// Root FlowDocument elemet is missing
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create a buffer StringBuilder for collecting css properties for inline STYLE attributes
|
||||
// on every element level (it will be re-initialized on every level).
|
||||
var inlineStyle = new StringBuilder();
|
||||
|
||||
htmlWriter.WriteStartElement("html");
|
||||
htmlWriter.WriteStartElement("body");
|
||||
|
||||
WriteFormattingProperties(xamlReader, htmlWriter, inlineStyle);
|
||||
|
||||
WriteElementContent(xamlReader, htmlWriter, inlineStyle);
|
||||
|
||||
htmlWriter.WriteEndElement();
|
||||
htmlWriter.WriteEndElement();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads attributes of the current xaml element and converts
|
||||
/// them into appropriate html attributes or css styles.
|
||||
/// </summary>
|
||||
/// <param name="xamlReader">
|
||||
/// XmlTextReader which is expected to be at XmlNodeType.Element
|
||||
/// (opening element tag) position.
|
||||
/// The reader will remain at the same level after function complete.
|
||||
/// </param>
|
||||
/// <param name="htmlWriter">
|
||||
/// XmlTextWriter for output html, which is expected to be in
|
||||
/// after WriteStartElement state.
|
||||
/// </param>
|
||||
/// <param name="inlineStyle">
|
||||
/// String builder for collecting css properties for inline STYLE attribute.
|
||||
/// </param>
|
||||
private static void WriteFormattingProperties(XmlTextReader xamlReader, XmlTextWriter htmlWriter,
|
||||
StringBuilder inlineStyle)
|
||||
{
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.Element);
|
||||
|
||||
// Clear string builder for the inline style
|
||||
inlineStyle.Remove(0, inlineStyle.Length);
|
||||
|
||||
if (!xamlReader.HasAttributes)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var borderSet = false;
|
||||
|
||||
while (xamlReader.MoveToNextAttribute())
|
||||
{
|
||||
string css = null;
|
||||
|
||||
switch (xamlReader.Name)
|
||||
{
|
||||
// Character fomatting properties
|
||||
// ------------------------------
|
||||
case "Background":
|
||||
css = "background-color:" + ParseXamlColor(xamlReader.Value) + ";";
|
||||
break;
|
||||
case "FontFamily":
|
||||
css = "font-family:" + xamlReader.Value + ";";
|
||||
break;
|
||||
case "FontStyle":
|
||||
css = "font-style:" + xamlReader.Value.ToLower() + ";";
|
||||
break;
|
||||
case "FontWeight":
|
||||
css = "font-weight:" + xamlReader.Value.ToLower() + ";";
|
||||
break;
|
||||
case "FontStretch":
|
||||
break;
|
||||
case "FontSize":
|
||||
css = "font-size:" + xamlReader.Value + ";";
|
||||
break;
|
||||
case "Foreground":
|
||||
css = "color:" + ParseXamlColor(xamlReader.Value) + ";";
|
||||
break;
|
||||
case "TextDecorations":
|
||||
css = "text-decoration:underline;";
|
||||
break;
|
||||
case "TextEffects":
|
||||
break;
|
||||
case "Emphasis":
|
||||
break;
|
||||
case "StandardLigatures":
|
||||
break;
|
||||
case "Variants":
|
||||
break;
|
||||
case "Capitals":
|
||||
break;
|
||||
case "Fraction":
|
||||
break;
|
||||
|
||||
// Paragraph formatting properties
|
||||
// -------------------------------
|
||||
case "Padding":
|
||||
css = "padding:" + ParseXamlThickness(xamlReader.Value) + ";";
|
||||
break;
|
||||
case "Margin":
|
||||
css = "margin:" + ParseXamlThickness(xamlReader.Value) + ";";
|
||||
break;
|
||||
case "BorderThickness":
|
||||
css = "border-width:" + ParseXamlThickness(xamlReader.Value) + ";";
|
||||
borderSet = true;
|
||||
break;
|
||||
case "BorderBrush":
|
||||
css = "border-color:" + ParseXamlColor(xamlReader.Value) + ";";
|
||||
borderSet = true;
|
||||
break;
|
||||
case "LineHeight":
|
||||
break;
|
||||
case "TextIndent":
|
||||
css = "text-indent:" + xamlReader.Value + ";";
|
||||
break;
|
||||
case "TextAlignment":
|
||||
css = "text-align:" + xamlReader.Value + ";";
|
||||
break;
|
||||
case "IsKeptTogether":
|
||||
break;
|
||||
case "IsKeptWithNext":
|
||||
break;
|
||||
case "ColumnBreakBefore":
|
||||
break;
|
||||
case "PageBreakBefore":
|
||||
break;
|
||||
case "FlowDirection":
|
||||
break;
|
||||
|
||||
// Table attributes
|
||||
// ----------------
|
||||
case "Width":
|
||||
css = "width:" + xamlReader.Value + ";";
|
||||
break;
|
||||
case "ColumnSpan":
|
||||
htmlWriter.WriteAttributeString("colspan", xamlReader.Value);
|
||||
break;
|
||||
case "RowSpan":
|
||||
htmlWriter.WriteAttributeString("rowspan", xamlReader.Value);
|
||||
break;
|
||||
}
|
||||
|
||||
if (css != null)
|
||||
{
|
||||
inlineStyle.Append(css);
|
||||
}
|
||||
}
|
||||
|
||||
if (borderSet)
|
||||
{
|
||||
inlineStyle.Append("border-style:solid;mso-element:para-border-div;");
|
||||
}
|
||||
|
||||
// Return the xamlReader back to element level
|
||||
xamlReader.MoveToElement();
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.Element);
|
||||
}
|
||||
|
||||
private static string ParseXamlColor(string color)
|
||||
{
|
||||
if (color.StartsWith("#"))
|
||||
{
|
||||
// Remove transparancy value
|
||||
color = "#" + color.Substring(3);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
private static string ParseXamlThickness(string thickness)
|
||||
{
|
||||
var values = thickness.Split(',');
|
||||
|
||||
for (var i = 0; i < values.Length; i++)
|
||||
{
|
||||
if (double.TryParse(values[i], NumberStyles.Any, CultureInfo.InvariantCulture, out double value))
|
||||
{
|
||||
values[i] = Math.Ceiling(value).ToString(CultureInfo.InvariantCulture);
|
||||
}
|
||||
else
|
||||
{
|
||||
values[i] = "1";
|
||||
}
|
||||
}
|
||||
|
||||
string cssThickness;
|
||||
switch (values.Length)
|
||||
{
|
||||
case 1:
|
||||
cssThickness = thickness;
|
||||
break;
|
||||
case 2:
|
||||
cssThickness = values[1] + " " + values[0];
|
||||
break;
|
||||
case 4:
|
||||
cssThickness = values[1] + " " + values[2] + " " + values[3] + " " + values[0];
|
||||
break;
|
||||
default:
|
||||
cssThickness = values[0];
|
||||
break;
|
||||
}
|
||||
|
||||
return cssThickness;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a content of current xaml element, converts it
|
||||
/// </summary>
|
||||
/// <param name="xamlReader">
|
||||
/// XmlTextReader which is expected to be at XmlNodeType.Element
|
||||
/// (opening element tag) position.
|
||||
/// </param>
|
||||
/// <param name="htmlWriter">
|
||||
/// May be null, in which case we are skipping the xaml element;
|
||||
/// witout producing any output to html.
|
||||
/// </param>
|
||||
/// <param name="inlineStyle">
|
||||
/// StringBuilder used for collecting css properties for inline STYLE attribute.
|
||||
/// </param>
|
||||
private static void WriteElementContent(XmlTextReader xamlReader, XmlTextWriter htmlWriter,
|
||||
StringBuilder inlineStyle)
|
||||
{
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.Element);
|
||||
|
||||
var elementContentStarted = false;
|
||||
|
||||
if (xamlReader.IsEmptyElement)
|
||||
{
|
||||
if (htmlWriter != null && !elementContentStarted && inlineStyle.Length > 0)
|
||||
{
|
||||
// Output STYLE attribute and clear inlineStyle buffer.
|
||||
htmlWriter.WriteAttributeString("STYLE", inlineStyle.ToString());
|
||||
inlineStyle.Remove(0, inlineStyle.Length);
|
||||
}
|
||||
elementContentStarted = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (ReadNextToken(xamlReader) && xamlReader.NodeType != XmlNodeType.EndElement)
|
||||
{
|
||||
switch (xamlReader.NodeType)
|
||||
{
|
||||
case XmlNodeType.Element:
|
||||
if (xamlReader.Name.Contains("."))
|
||||
{
|
||||
AddComplexProperty(xamlReader, inlineStyle);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (htmlWriter != null && !elementContentStarted && inlineStyle.Length > 0)
|
||||
{
|
||||
// Output STYLE attribute and clear inlineStyle buffer.
|
||||
htmlWriter.WriteAttributeString("style", inlineStyle.ToString());
|
||||
inlineStyle.Remove(0, inlineStyle.Length);
|
||||
}
|
||||
elementContentStarted = true;
|
||||
WriteElement(xamlReader, htmlWriter, inlineStyle);
|
||||
}
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.EndElement ||
|
||||
xamlReader.NodeType == XmlNodeType.Element && xamlReader.IsEmptyElement);
|
||||
break;
|
||||
case XmlNodeType.Comment:
|
||||
if (htmlWriter != null)
|
||||
{
|
||||
if (!elementContentStarted && inlineStyle.Length > 0)
|
||||
{
|
||||
htmlWriter.WriteAttributeString("style", inlineStyle.ToString());
|
||||
}
|
||||
htmlWriter.WriteComment(xamlReader.Value);
|
||||
}
|
||||
elementContentStarted = true;
|
||||
break;
|
||||
case XmlNodeType.CDATA:
|
||||
case XmlNodeType.Text:
|
||||
case XmlNodeType.SignificantWhitespace:
|
||||
if (htmlWriter != null)
|
||||
{
|
||||
if (!elementContentStarted && inlineStyle.Length > 0)
|
||||
{
|
||||
htmlWriter.WriteAttributeString("style", inlineStyle.ToString());
|
||||
}
|
||||
htmlWriter.WriteString(xamlReader.Value);
|
||||
}
|
||||
elementContentStarted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.EndElement);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Conberts an element notation of complex property into
|
||||
/// </summary>
|
||||
/// <param name="xamlReader">
|
||||
/// On entry this XmlTextReader must be on Element start tag;
|
||||
/// on exit - on EndElement tag.
|
||||
/// </param>
|
||||
/// <param name="inlineStyle">
|
||||
/// StringBuilder containing a value for STYLE attribute.
|
||||
/// </param>
|
||||
private static void AddComplexProperty(XmlTextReader xamlReader, StringBuilder inlineStyle)
|
||||
{
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.Element);
|
||||
|
||||
if (inlineStyle != null && xamlReader.Name.EndsWith(".TextDecorations"))
|
||||
{
|
||||
inlineStyle.Append("text-decoration:underline;");
|
||||
}
|
||||
|
||||
// Skip the element representing the complex property
|
||||
WriteElementContent(xamlReader, /*htmlWriter:*/null, /*inlineStyle:*/null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a xaml element into an appropriate html element.
|
||||
/// </summary>
|
||||
/// <param name="xamlReader">
|
||||
/// On entry this XmlTextReader must be on Element start tag;
|
||||
/// on exit - on EndElement tag.
|
||||
/// </param>
|
||||
/// <param name="htmlWriter">
|
||||
/// May be null, in which case we are skipping xaml content
|
||||
/// without producing any html output
|
||||
/// </param>
|
||||
/// <param name="inlineStyle">
|
||||
/// StringBuilder used for collecting css properties for inline STYLE attributes on every level.
|
||||
/// </param>
|
||||
private static void WriteElement(XmlTextReader xamlReader, XmlTextWriter htmlWriter, StringBuilder inlineStyle)
|
||||
{
|
||||
Debug.Assert(xamlReader.NodeType == XmlNodeType.Element);
|
||||
|
||||
if (htmlWriter == null)
|
||||
{
|
||||
// Skipping mode; recurse into the xaml element without any output
|
||||
WriteElementContent(xamlReader, /*htmlWriter:*/null, null);
|
||||
}
|
||||
else
|
||||
{
|
||||
string htmlElementName = null;
|
||||
|
||||
switch (xamlReader.Name)
|
||||
{
|
||||
case "Run":
|
||||
case "Span":
|
||||
htmlElementName = "span";
|
||||
break;
|
||||
case "InlineUIContainer":
|
||||
htmlElementName = "span";
|
||||
break;
|
||||
case "Bold":
|
||||
htmlElementName = "b";
|
||||
break;
|
||||
case "Italic":
|
||||
htmlElementName = "i";
|
||||
break;
|
||||
case "Paragraph":
|
||||
htmlElementName = "p";
|
||||
break;
|
||||
case "BlockUIContainer":
|
||||
htmlElementName = "div";
|
||||
break;
|
||||
case "Section":
|
||||
htmlElementName = "div";
|
||||
break;
|
||||
case "Table":
|
||||
htmlElementName = "table";
|
||||
break;
|
||||
case "TableColumn":
|
||||
htmlElementName = "col";
|
||||
break;
|
||||
case "TableRowGroup":
|
||||
htmlElementName = "tbody";
|
||||
break;
|
||||
case "TableRow":
|
||||
htmlElementName = "tr";
|
||||
break;
|
||||
case "TableCell":
|
||||
htmlElementName = "td";
|
||||
break;
|
||||
case "List":
|
||||
var marker = xamlReader.GetAttribute("MarkerStyle");
|
||||
if (marker == null || marker == "None" || marker == "Disc" || marker == "Circle" ||
|
||||
marker == "Square" ||
|
||||
marker == "Box")
|
||||
{
|
||||
htmlElementName = "ul";
|
||||
}
|
||||
else
|
||||
{
|
||||
htmlElementName = "ol";
|
||||
}
|
||||
break;
|
||||
case "ListItem":
|
||||
htmlElementName = "li";
|
||||
break;
|
||||
default:
|
||||
htmlElementName = null; // Ignore the element
|
||||
break;
|
||||
}
|
||||
|
||||
if (htmlWriter != null && htmlElementName != null)
|
||||
{
|
||||
htmlWriter.WriteStartElement(htmlElementName);
|
||||
|
||||
WriteFormattingProperties(xamlReader, htmlWriter, inlineStyle);
|
||||
|
||||
WriteElementContent(xamlReader, htmlWriter, inlineStyle);
|
||||
|
||||
htmlWriter.WriteEndElement();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Skip this unrecognized xaml element
|
||||
WriteElementContent(xamlReader, /*htmlWriter:*/null, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reader advance helpers
|
||||
// ----------------------
|
||||
|
||||
/// <summary>
|
||||
/// Reads several items from xamlReader skipping all non-significant stuff.
|
||||
/// </summary>
|
||||
/// <param name="xamlReader">
|
||||
/// XmlTextReader from tokens are being read.
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// True if new token is available; false if end of stream reached.
|
||||
/// </returns>
|
||||
private static bool ReadNextToken(XmlReader xamlReader)
|
||||
{
|
||||
while (xamlReader.Read())
|
||||
{
|
||||
Debug.Assert(xamlReader.ReadState == ReadState.Interactive,
|
||||
"Reader is expected to be in Interactive state (" + xamlReader.ReadState + ")");
|
||||
switch (xamlReader.NodeType)
|
||||
{
|
||||
case XmlNodeType.Element:
|
||||
case XmlNodeType.EndElement:
|
||||
case XmlNodeType.None:
|
||||
case XmlNodeType.CDATA:
|
||||
case XmlNodeType.Text:
|
||||
case XmlNodeType.SignificantWhitespace:
|
||||
return true;
|
||||
|
||||
case XmlNodeType.Whitespace:
|
||||
if (xamlReader.XmlSpace == XmlSpace.Preserve)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// ignore insignificant whitespace
|
||||
break;
|
||||
|
||||
case XmlNodeType.EndEntity:
|
||||
case XmlNodeType.EntityReference:
|
||||
// Implement entity reading
|
||||
//xamlReader.ResolveEntity();
|
||||
//xamlReader.Read();
|
||||
//ReadChildNodes( parent, parentBaseUri, xamlReader, positionInfo);
|
||||
break; // for now we ignore entities as insignificant stuff
|
||||
|
||||
case XmlNodeType.Comment:
|
||||
return true;
|
||||
case XmlNodeType.ProcessingInstruction:
|
||||
case XmlNodeType.DocumentType:
|
||||
case XmlNodeType.XmlDeclaration:
|
||||
default:
|
||||
// Ignorable stuff
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#endregion Private Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Fields
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Fields
|
||||
|
||||
#endregion Private Fields
|
||||
}
|
||||
}
|
||||
745
JRCookbookBusiness/Converters/HtmlLexicalAnalyzer.cs
Normal file
745
JRCookbookBusiness/Converters/HtmlLexicalAnalyzer.cs
Normal file
@@ -0,0 +1,745 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
/// <summary>
|
||||
/// lexical analyzer class
|
||||
/// recognizes tokens as groups of characters separated by arbitrary amounts of whitespace
|
||||
/// also classifies tokens according to type
|
||||
/// </summary>
|
||||
internal class HtmlLexicalAnalyzer
|
||||
{
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Constructors
|
||||
|
||||
/// <summary>
|
||||
/// initializes the _inputStringReader member with the string to be read
|
||||
/// also sets initial values for _nextCharacterCode and _nextTokenType
|
||||
/// </summary>
|
||||
/// <param name="inputTextString">
|
||||
/// text string to be parsed for xml content
|
||||
/// </param>
|
||||
internal HtmlLexicalAnalyzer(string inputTextString)
|
||||
{
|
||||
_inputStringReader = new StringReader(inputTextString);
|
||||
_nextCharacterCode = 0;
|
||||
NextCharacter = ' ';
|
||||
_lookAheadCharacterCode = _inputStringReader.Read();
|
||||
_lookAheadCharacter = (char) _lookAheadCharacterCode;
|
||||
_previousCharacter = ' ';
|
||||
_ignoreNextWhitespace = true;
|
||||
_nextToken = new StringBuilder(100);
|
||||
NextTokenType = HtmlTokenType.Text;
|
||||
// read the first character so we have some value for the NextCharacter property
|
||||
GetNextCharacter();
|
||||
}
|
||||
|
||||
#endregion Constructors
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Internal methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Internal Methods
|
||||
|
||||
/// <summary>
|
||||
/// retrieves next recognizable token from input string
|
||||
/// and identifies its type
|
||||
/// if no valid token is found, the output parameters are set to null
|
||||
/// if end of stream is reached without matching any token, token type
|
||||
/// paramter is set to EOF
|
||||
/// </summary>
|
||||
internal void GetNextContentToken()
|
||||
{
|
||||
Debug.Assert(NextTokenType != HtmlTokenType.Eof);
|
||||
_nextToken.Length = 0;
|
||||
if (IsAtEndOfStream)
|
||||
{
|
||||
NextTokenType = HtmlTokenType.Eof;
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsAtTagStart)
|
||||
{
|
||||
GetNextCharacter();
|
||||
|
||||
if (NextCharacter == '/')
|
||||
{
|
||||
_nextToken.Append("</");
|
||||
NextTokenType = HtmlTokenType.ClosingTagStart;
|
||||
|
||||
// advance
|
||||
GetNextCharacter();
|
||||
_ignoreNextWhitespace = false; // Whitespaces after closing tags are significant
|
||||
}
|
||||
else
|
||||
{
|
||||
NextTokenType = HtmlTokenType.OpeningTagStart;
|
||||
_nextToken.Append("<");
|
||||
_ignoreNextWhitespace = true; // Whitespaces after opening tags are insignificant
|
||||
}
|
||||
}
|
||||
else if (IsAtDirectiveStart)
|
||||
{
|
||||
// either a comment or CDATA
|
||||
GetNextCharacter();
|
||||
if (_lookAheadCharacter == '[')
|
||||
{
|
||||
// cdata
|
||||
ReadDynamicContent();
|
||||
}
|
||||
else if (_lookAheadCharacter == '-')
|
||||
{
|
||||
ReadComment();
|
||||
}
|
||||
else
|
||||
{
|
||||
// neither a comment nor cdata, should be something like DOCTYPE
|
||||
// skip till the next tag ender
|
||||
ReadUnknownDirective();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// read text content, unless you encounter a tag
|
||||
NextTokenType = HtmlTokenType.Text;
|
||||
while (!IsAtTagStart && !IsAtEndOfStream && !IsAtDirectiveStart)
|
||||
{
|
||||
if (NextCharacter == '<' && !IsNextCharacterEntity && _lookAheadCharacter == '?')
|
||||
{
|
||||
// ignore processing directive
|
||||
SkipProcessingDirective();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (NextCharacter <= ' ')
|
||||
{
|
||||
// Respect xml:preserve or its equivalents for whitespace processing
|
||||
if (_ignoreNextWhitespace)
|
||||
{
|
||||
// Ignore repeated whitespaces
|
||||
}
|
||||
else
|
||||
{
|
||||
// Treat any control character sequence as one whitespace
|
||||
_nextToken.Append(' ');
|
||||
}
|
||||
_ignoreNextWhitespace = true; // and keep ignoring the following whitespaces
|
||||
}
|
||||
else
|
||||
{
|
||||
_nextToken.Append(NextCharacter);
|
||||
_ignoreNextWhitespace = false;
|
||||
}
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unconditionally returns a token which is one of: TagEnd, EmptyTagEnd, Name, Atom or EndOfStream
|
||||
/// Does not guarantee token reader advancing.
|
||||
/// </summary>
|
||||
internal void GetNextTagToken()
|
||||
{
|
||||
_nextToken.Length = 0;
|
||||
if (IsAtEndOfStream)
|
||||
{
|
||||
NextTokenType = HtmlTokenType.Eof;
|
||||
return;
|
||||
}
|
||||
|
||||
SkipWhiteSpace();
|
||||
|
||||
if (NextCharacter == '>' && !IsNextCharacterEntity)
|
||||
{
|
||||
// > should not end a tag, so make sure it's not an entity
|
||||
NextTokenType = HtmlTokenType.TagEnd;
|
||||
_nextToken.Append('>');
|
||||
GetNextCharacter();
|
||||
// Note: _ignoreNextWhitespace must be set appropriately on tag start processing
|
||||
}
|
||||
else if (NextCharacter == '/' && _lookAheadCharacter == '>')
|
||||
{
|
||||
// could be start of closing of empty tag
|
||||
NextTokenType = HtmlTokenType.EmptyTagEnd;
|
||||
_nextToken.Append("/>");
|
||||
GetNextCharacter();
|
||||
GetNextCharacter();
|
||||
_ignoreNextWhitespace = false; // Whitespace after no-scope tags are sifnificant
|
||||
}
|
||||
else if (IsGoodForNameStart(NextCharacter))
|
||||
{
|
||||
NextTokenType = HtmlTokenType.Name;
|
||||
|
||||
// starts a name
|
||||
// we allow character entities here
|
||||
// we do not throw exceptions here if end of stream is encountered
|
||||
// just stop and return whatever is in the token
|
||||
// if the parser is not expecting end of file after this it will call
|
||||
// the get next token function and throw an exception
|
||||
while (IsGoodForName(NextCharacter) && !IsAtEndOfStream)
|
||||
{
|
||||
_nextToken.Append(NextCharacter);
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unexpected type of token for a tag. Reprot one character as Atom, expecting that HtmlParser will ignore it.
|
||||
NextTokenType = HtmlTokenType.Atom;
|
||||
_nextToken.Append(NextCharacter);
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unconditionally returns equal sign token. Even if there is no
|
||||
/// real equal sign in the stream, it behaves as if it were there.
|
||||
/// Does not guarantee token reader advancing.
|
||||
/// </summary>
|
||||
internal void GetNextEqualSignToken()
|
||||
{
|
||||
Debug.Assert(NextTokenType != HtmlTokenType.Eof);
|
||||
_nextToken.Length = 0;
|
||||
|
||||
_nextToken.Append('=');
|
||||
NextTokenType = HtmlTokenType.EqualSign;
|
||||
|
||||
SkipWhiteSpace();
|
||||
|
||||
if (NextCharacter == '=')
|
||||
{
|
||||
// '=' is not in the list of entities, so no need to check for entities here
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unconditionally returns an atomic value for an attribute
|
||||
/// Even if there is no appropriate token it returns Atom value
|
||||
/// Does not guarantee token reader advancing.
|
||||
/// </summary>
|
||||
internal void GetNextAtomToken()
|
||||
{
|
||||
Debug.Assert(NextTokenType != HtmlTokenType.Eof);
|
||||
_nextToken.Length = 0;
|
||||
|
||||
SkipWhiteSpace();
|
||||
|
||||
NextTokenType = HtmlTokenType.Atom;
|
||||
|
||||
if ((NextCharacter == '\'' || NextCharacter == '"') && !IsNextCharacterEntity)
|
||||
{
|
||||
var startingQuote = NextCharacter;
|
||||
GetNextCharacter();
|
||||
|
||||
// Consume all characters between quotes
|
||||
while (!(NextCharacter == startingQuote && !IsNextCharacterEntity) && !IsAtEndOfStream)
|
||||
{
|
||||
_nextToken.Append(NextCharacter);
|
||||
GetNextCharacter();
|
||||
}
|
||||
if (NextCharacter == startingQuote)
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
|
||||
// complete the quoted value
|
||||
// NOTE: our recovery here is different from IE's
|
||||
// IE keeps reading until it finds a closing quote or end of file
|
||||
// if end of file, it treats current value as text
|
||||
// if it finds a closing quote at any point within the text, it eats everything between the quotes
|
||||
// TODO: Suggestion:
|
||||
// however, we could stop when we encounter end of file or an angle bracket of any kind
|
||||
// and assume there was a quote there
|
||||
// so the attribute value may be meaningless but it is never treated as text
|
||||
}
|
||||
else
|
||||
{
|
||||
while (!IsAtEndOfStream && !char.IsWhiteSpace(NextCharacter) && NextCharacter != '>')
|
||||
{
|
||||
_nextToken.Append(NextCharacter);
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endregion Internal Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Internal Properties
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Internal Properties
|
||||
|
||||
internal HtmlTokenType NextTokenType { get; private set; }
|
||||
|
||||
internal string NextToken => _nextToken.ToString();
|
||||
|
||||
#endregion Internal Properties
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Methods
|
||||
|
||||
/// <summary>
|
||||
/// Advances a reading position by one character code
|
||||
/// and reads the next availbale character from a stream.
|
||||
/// This character becomes available as NextCharacter property.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Throws InvalidOperationException if attempted to be called on EndOfStream
|
||||
/// condition.
|
||||
/// </remarks>
|
||||
private void GetNextCharacter()
|
||||
{
|
||||
if (_nextCharacterCode == -1)
|
||||
{
|
||||
throw new InvalidOperationException("GetNextCharacter method called at the end of a stream");
|
||||
}
|
||||
|
||||
_previousCharacter = NextCharacter;
|
||||
|
||||
NextCharacter = _lookAheadCharacter;
|
||||
_nextCharacterCode = _lookAheadCharacterCode;
|
||||
// next character not an entity as of now
|
||||
IsNextCharacterEntity = false;
|
||||
|
||||
ReadLookAheadCharacter();
|
||||
|
||||
if (NextCharacter == '&')
|
||||
{
|
||||
if (_lookAheadCharacter == '#')
|
||||
{
|
||||
// numeric entity - parse digits - &#DDDDD;
|
||||
int entityCode;
|
||||
entityCode = 0;
|
||||
ReadLookAheadCharacter();
|
||||
|
||||
// largest numeric entity is 7 characters
|
||||
for (var i = 0; i < 7 && char.IsDigit(_lookAheadCharacter); i++)
|
||||
{
|
||||
entityCode = 10*entityCode + (_lookAheadCharacterCode - '0');
|
||||
ReadLookAheadCharacter();
|
||||
}
|
||||
if (_lookAheadCharacter == ';')
|
||||
{
|
||||
// correct format - advance
|
||||
ReadLookAheadCharacter();
|
||||
_nextCharacterCode = entityCode;
|
||||
|
||||
// if this is out of range it will set the character to '?'
|
||||
NextCharacter = (char) _nextCharacterCode;
|
||||
|
||||
// as far as we are concerned, this is an entity
|
||||
IsNextCharacterEntity = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// not an entity, set next character to the current lookahread character
|
||||
// we would have eaten up some digits
|
||||
NextCharacter = _lookAheadCharacter;
|
||||
_nextCharacterCode = _lookAheadCharacterCode;
|
||||
ReadLookAheadCharacter();
|
||||
IsNextCharacterEntity = false;
|
||||
}
|
||||
}
|
||||
else if (char.IsLetter(_lookAheadCharacter))
|
||||
{
|
||||
// entity is written as a string
|
||||
var entity = "";
|
||||
|
||||
// maximum length of string entities is 10 characters
|
||||
for (var i = 0;
|
||||
i < 10 && (char.IsLetter(_lookAheadCharacter) || char.IsDigit(_lookAheadCharacter));
|
||||
i++)
|
||||
{
|
||||
entity += _lookAheadCharacter;
|
||||
ReadLookAheadCharacter();
|
||||
}
|
||||
if (_lookAheadCharacter == ';')
|
||||
{
|
||||
// advance
|
||||
ReadLookAheadCharacter();
|
||||
|
||||
if (HtmlSchema.IsEntity(entity))
|
||||
{
|
||||
NextCharacter = HtmlSchema.EntityCharacterValue(entity);
|
||||
_nextCharacterCode = NextCharacter;
|
||||
IsNextCharacterEntity = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// just skip the whole thing - invalid entity
|
||||
// move on to the next character
|
||||
NextCharacter = _lookAheadCharacter;
|
||||
_nextCharacterCode = _lookAheadCharacterCode;
|
||||
ReadLookAheadCharacter();
|
||||
|
||||
// not an entity
|
||||
IsNextCharacterEntity = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// skip whatever we read after the ampersand
|
||||
// set next character and move on
|
||||
NextCharacter = _lookAheadCharacter;
|
||||
ReadLookAheadCharacter();
|
||||
IsNextCharacterEntity = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void ReadLookAheadCharacter()
|
||||
{
|
||||
if (_lookAheadCharacterCode != -1)
|
||||
{
|
||||
_lookAheadCharacterCode = _inputStringReader.Read();
|
||||
_lookAheadCharacter = (char) _lookAheadCharacterCode;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// skips whitespace in the input string
|
||||
/// leaves the first non-whitespace character available in the NextCharacter property
|
||||
/// this may be the end-of-file character, it performs no checking
|
||||
/// </summary>
|
||||
private void SkipWhiteSpace()
|
||||
{
|
||||
// TODO: handle character entities while processing comments, cdata, and directives
|
||||
// TODO: SUGGESTION: we could check if lookahead and previous characters are entities also
|
||||
while (true)
|
||||
{
|
||||
if (NextCharacter == '<' && (_lookAheadCharacter == '?' || _lookAheadCharacter == '!'))
|
||||
{
|
||||
GetNextCharacter();
|
||||
|
||||
if (_lookAheadCharacter == '[')
|
||||
{
|
||||
// Skip CDATA block and DTDs(?)
|
||||
while (!IsAtEndOfStream &&
|
||||
!(_previousCharacter == ']' && NextCharacter == ']' && _lookAheadCharacter == '>'))
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
if (NextCharacter == '>')
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Skip processing instruction, comments
|
||||
while (!IsAtEndOfStream && NextCharacter != '>')
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
if (NextCharacter == '>')
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!char.IsWhiteSpace(NextCharacter))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// checks if a character can be used to start a name
|
||||
/// if this check is true then the rest of the name can be read
|
||||
/// </summary>
|
||||
/// <param name="character">
|
||||
/// character value to be checked
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// true if the character can be the first character in a name
|
||||
/// false otherwise
|
||||
/// </returns>
|
||||
private bool IsGoodForNameStart(char character) => character == '_' || char.IsLetter(character);
|
||||
|
||||
/// <summary>
|
||||
/// checks if a character can be used as a non-starting character in a name
|
||||
/// uses the IsExtender and IsCombiningCharacter predicates to see
|
||||
/// if a character is an extender or a combining character
|
||||
/// </summary>
|
||||
/// <param name="character">
|
||||
/// character to be checked for validity in a name
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// true if the character can be a valid part of a name
|
||||
/// </returns>
|
||||
private bool IsGoodForName(char character) => IsGoodForNameStart(character) ||
|
||||
character == '.' ||
|
||||
character == '-' ||
|
||||
character == ':' ||
|
||||
char.IsDigit(character) ||
|
||||
IsCombiningCharacter(character) ||
|
||||
IsExtender(character);
|
||||
|
||||
/// <summary>
|
||||
/// identifies a character as being a combining character, permitted in a name
|
||||
/// TODO: only a placeholder for now but later to be replaced with comparisons against
|
||||
/// the list of combining characters in the XML documentation
|
||||
/// </summary>
|
||||
/// <param name="character">
|
||||
/// character to be checked
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// true if the character is a combining character, false otherwise
|
||||
/// </returns>
|
||||
private bool IsCombiningCharacter(char character) => false;
|
||||
|
||||
/// <summary>
|
||||
/// identifies a character as being an extender, permitted in a name
|
||||
/// TODO: only a placeholder for now but later to be replaced with comparisons against
|
||||
/// the list of extenders in the XML documentation
|
||||
/// </summary>
|
||||
/// <param name="character">
|
||||
/// character to be checked
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// true if the character is an extender, false otherwise
|
||||
/// </returns>
|
||||
private bool IsExtender(char character) => false;
|
||||
|
||||
/// <summary>
|
||||
/// skips dynamic content starting with '<![' and ending with ']>'
|
||||
/// </summary>
|
||||
private void ReadDynamicContent()
|
||||
{
|
||||
// verify that we are at dynamic content, which may include CDATA
|
||||
Debug.Assert(_previousCharacter == '<' && NextCharacter == '!' && _lookAheadCharacter == '[');
|
||||
|
||||
// Let's treat this as empty text
|
||||
NextTokenType = HtmlTokenType.Text;
|
||||
_nextToken.Length = 0;
|
||||
|
||||
// advance twice, once to get the lookahead character and then to reach the start of the cdata
|
||||
GetNextCharacter();
|
||||
GetNextCharacter();
|
||||
|
||||
// NOTE: 10/12/2004: modified this function to check when called if's reading CDATA or something else
|
||||
// some directives may start with a <![ and then have some data and they will just end with a ]>
|
||||
// this function is modified to stop at the sequence ]> and not ]]>
|
||||
// this means that CDATA and anything else expressed in their own set of [] within the <! [...]>
|
||||
// directive cannot contain a ]> sequence. However it is doubtful that cdata could contain such
|
||||
// sequence anyway, it probably stops at the first ]
|
||||
while (!(NextCharacter == ']' && _lookAheadCharacter == '>') && !IsAtEndOfStream)
|
||||
{
|
||||
// advance
|
||||
GetNextCharacter();
|
||||
}
|
||||
|
||||
if (!IsAtEndOfStream)
|
||||
{
|
||||
// advance, first to the last >
|
||||
GetNextCharacter();
|
||||
|
||||
// then advance past it to the next character after processing directive
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// skips comments starting with '<!-' and ending with '-->'
|
||||
/// NOTE: 10/06/2004: processing changed, will now skip anything starting with
|
||||
/// the "<!-" sequence and ending in "!>" or "->", because in practice many html pages do not
|
||||
/// use the full comment specifying conventions
|
||||
/// </summary>
|
||||
private void ReadComment()
|
||||
{
|
||||
// verify that we are at a comment
|
||||
Debug.Assert(_previousCharacter == '<' && NextCharacter == '!' && _lookAheadCharacter == '-');
|
||||
|
||||
// Initialize a token
|
||||
NextTokenType = HtmlTokenType.Comment;
|
||||
_nextToken.Length = 0;
|
||||
|
||||
// advance to the next character, so that to be at the start of comment value
|
||||
GetNextCharacter(); // get first '-'
|
||||
GetNextCharacter(); // get second '-'
|
||||
GetNextCharacter(); // get first character of comment content
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Read text until end of comment
|
||||
// Note that in many actual html pages comments end with "!>" (while xml standard is "-->")
|
||||
while (!IsAtEndOfStream &&
|
||||
!(NextCharacter == '-' && _lookAheadCharacter == '-' ||
|
||||
NextCharacter == '!' && _lookAheadCharacter == '>'))
|
||||
{
|
||||
_nextToken.Append(NextCharacter);
|
||||
GetNextCharacter();
|
||||
}
|
||||
|
||||
// Finish comment reading
|
||||
GetNextCharacter();
|
||||
if (_previousCharacter == '-' && NextCharacter == '-' && _lookAheadCharacter == '>')
|
||||
{
|
||||
// Standard comment end. Eat it and exit the loop
|
||||
GetNextCharacter(); // get '>'
|
||||
break;
|
||||
}
|
||||
if (_previousCharacter == '!' && NextCharacter == '>')
|
||||
{
|
||||
// Nonstandard but possible comment end - '!>'. Exit the loop
|
||||
break;
|
||||
}
|
||||
// Not an end. Save character and continue continue reading
|
||||
_nextToken.Append(_previousCharacter);
|
||||
}
|
||||
|
||||
// Read end of comment combination
|
||||
if (NextCharacter == '>')
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// skips past unknown directives that start with "<!" but are not comments or Cdata
|
||||
/// ignores content of such directives until the next ">"
|
||||
/// character
|
||||
/// applies to directives such as DOCTYPE, etc that we do not presently support
|
||||
/// </summary>
|
||||
private void ReadUnknownDirective()
|
||||
{
|
||||
// verify that we are at an unknown directive
|
||||
Debug.Assert(_previousCharacter == '<' && NextCharacter == '!' &&
|
||||
!(_lookAheadCharacter == '-' || _lookAheadCharacter == '['));
|
||||
|
||||
// Let's treat this as empty text
|
||||
NextTokenType = HtmlTokenType.Text;
|
||||
_nextToken.Length = 0;
|
||||
|
||||
// advance to the next character
|
||||
GetNextCharacter();
|
||||
|
||||
// skip to the first tag end we find
|
||||
while (!(NextCharacter == '>' && !IsNextCharacterEntity) && !IsAtEndOfStream)
|
||||
{
|
||||
GetNextCharacter();
|
||||
}
|
||||
|
||||
if (!IsAtEndOfStream)
|
||||
{
|
||||
// advance past the tag end
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// skips processing directives starting with the characters '<?' and ending with '?>'
|
||||
/// NOTE: 10/14/2004: IE also ends processing directives with a />, so this function is
|
||||
/// being modified to recognize that condition as well
|
||||
/// </summary>
|
||||
private void SkipProcessingDirective()
|
||||
{
|
||||
// verify that we are at a processing directive
|
||||
Debug.Assert(NextCharacter == '<' && _lookAheadCharacter == '?');
|
||||
|
||||
// advance twice, once to get the lookahead character and then to reach the start of the drective
|
||||
GetNextCharacter();
|
||||
GetNextCharacter();
|
||||
|
||||
while (!((NextCharacter == '?' || NextCharacter == '/') && _lookAheadCharacter == '>') && !IsAtEndOfStream)
|
||||
{
|
||||
// advance
|
||||
// we don't need to check for entities here because '?' is not an entity
|
||||
// and even though > is an entity there is no entity processing when reading lookahead character
|
||||
GetNextCharacter();
|
||||
}
|
||||
|
||||
if (!IsAtEndOfStream)
|
||||
{
|
||||
// advance, first to the last >
|
||||
GetNextCharacter();
|
||||
|
||||
// then advance past it to the next character after processing directive
|
||||
GetNextCharacter();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion Private Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Properties
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Properties
|
||||
|
||||
private char NextCharacter { get; set; }
|
||||
|
||||
private bool IsAtEndOfStream => _nextCharacterCode == -1;
|
||||
|
||||
private bool IsAtTagStart
|
||||
=> NextCharacter == '<' && (_lookAheadCharacter == '/' || IsGoodForNameStart(_lookAheadCharacter)) &&
|
||||
!IsNextCharacterEntity;
|
||||
|
||||
private bool IsAtTagEnd => (NextCharacter == '>' || (NextCharacter == '/' && _lookAheadCharacter == '>')) &&
|
||||
!IsNextCharacterEntity;
|
||||
|
||||
private bool IsAtDirectiveStart
|
||||
=> (NextCharacter == '<' && _lookAheadCharacter == '!' && !IsNextCharacterEntity);
|
||||
|
||||
private bool IsNextCharacterEntity { // check if next character is an entity
|
||||
get; set; }
|
||||
|
||||
#endregion Private Properties
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Fields
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Fields
|
||||
|
||||
// string reader which will move over input text
|
||||
private readonly StringReader _inputStringReader;
|
||||
// next character code read from input that is not yet part of any token
|
||||
// and the character it represents
|
||||
private int _nextCharacterCode;
|
||||
private int _lookAheadCharacterCode;
|
||||
private char _lookAheadCharacter;
|
||||
private char _previousCharacter;
|
||||
private bool _ignoreNextWhitespace;
|
||||
|
||||
// store token and type in local variables before copying them to output parameters
|
||||
private readonly StringBuilder _nextToken;
|
||||
|
||||
#endregion Private Fields
|
||||
}
|
||||
}
|
||||
539
JRCookbookBusiness/Converters/HtmlParser.cs
Normal file
539
JRCookbookBusiness/Converters/HtmlParser.cs
Normal file
@@ -0,0 +1,539 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
|
||||
// StringBuilder
|
||||
|
||||
// important TODOS:
|
||||
// TODO 1. Start tags: The ParseXmlElement function has been modified to be called after both the
|
||||
// angle bracket < and element name have been read, instead of just the < bracket and some valid name character,
|
||||
// previously the case. This change was made so that elements with optional closing tags could read a new
|
||||
// element's start tag and decide whether they were required to close. However, there is a question of whether to
|
||||
// handle this in the parser or lexical analyzer. It is currently handled in the parser - the lexical analyzer still
|
||||
// recognizes a start tag opener as a '<' + valid name start char; it is the parser that reads the actual name.
|
||||
// this is correct behavior assuming that the name is a valid html name, because the lexical analyzer should not know anything
|
||||
// about optional closing tags, etc. UPDATED: 10/13/2004: I am updating this to read the whole start tag of something
|
||||
// that is not an HTML, treat it as empty, and add it to the tree. That way the converter will know it's there, but
|
||||
// it will hvae no content. We could also partially recover by trying to look up and match names if they are similar
|
||||
// TODO 2. Invalid element names: However, it might make sense to give the lexical analyzer the ability to identify
|
||||
// a valid html element name and not return something as a start tag otherwise. For example, if we type <good>, should
|
||||
// the lexical analyzer return that it has found the start of an element when this is not the case in HTML? But this will
|
||||
// require implementing a lookahead token in the lexical analyzer so that it can treat an invalid element name as text. One
|
||||
// character of lookahead will not be enough.
|
||||
// TODO 3. Attributes: The attribute recovery is poor when reading attribute values in quotes - if no closing quotes are found,
|
||||
// the lexical analyzer just keeps reading and if it eventually reaches the end of file, it would have just skipped everything.
|
||||
// There are a couple of ways to deal with this: 1) stop reading attributes when we encounter a '>' character - this doesn't allow
|
||||
// the '>' character to be used in attribute values, but it can still be used as an entity. 2) Maintain a HTML-specific list
|
||||
// of attributes and their values that each html element can take, and if we find correct attribute namesand values for an
|
||||
// element we use them regardless of the quotes, this way we could just ignore something invalid. One more option: 3) Read ahead
|
||||
// in the quoted value and if we find an end of file, we can return to where we were and process as text. However this requires
|
||||
// a lot of lookahead and a resettable reader.
|
||||
// TODO 4: elements with optional closing tags: For elements with optional closing tags, we always close the element if we find
|
||||
// that one of it's ancestors has closed. This condition may be too broad and we should develop a better heuristic. We should also
|
||||
// improve the heuristics for closing certain elements when the next element starts
|
||||
// TODO 5. Nesting: Support for unbalanced nesting, e.g. <b> <i> </b> </i>: this is not presently supported. To support it we may need
|
||||
// to maintain two xml elements, one the element that represents what has already been read and another represents what we are presently reading.
|
||||
// Then if we encounter an unbalanced nesting tag we could close the element that was supposed to close, save the current element
|
||||
// and store it in the list of already-read content, and then open a new element to which all tags that are currently open
|
||||
// can be applied. Is there a better way to do this? Should we do it at all?
|
||||
// TODO 6. Elements with optional starting tags: there are 4 such elements in the HTML 4 specification - html, tbody, body and head.
|
||||
// The current recovery doesn;t do anything for any of these elements except the html element, because it's not critical - head
|
||||
// and body elementscan be contained within html element, and tbody is contained within table. To extend this for XHTML
|
||||
// extensions, and to recover in case other elements are missing start tags, we would need to insert an extra recursive call
|
||||
// to ParseXmlElement for the missing start tag. It is suggested to do this by giving ParseXmlElement an argument that specifies
|
||||
// a name to use. If this argument is null, it assumes its name is the next token from the lexical analyzer and continues
|
||||
// exactly as it does now. However, if the argument contains a valid html element name then it takes that value as its name
|
||||
// and continues as before. This way, if the next token is the element that should actually be its child, it will see
|
||||
// the name in the next step and initiate a recursive call. We would also need to add some logic in the loop for when a start tag
|
||||
// is found - if the start tag is not compatible with current context and indicates that a start tag has been missed, then we
|
||||
// can initiate the extra recursive call and give it the name of the missed start tag. The issues are when to insert this logic,
|
||||
// and if we want to support it over multiple missing start tags. If we insert it at the time a start tag is read in element
|
||||
// text, then we can support only one missing start tag, since the extra call will read the next start tag and make a recursive
|
||||
// call without checking the context. This is a conceptual problem, and the check should be made just before a recursive call,
|
||||
// with the choice being whether we should supply an element name as argument, or leave it as NULL and read from the input
|
||||
// TODO 7: Context: Is it appropriate to keep track of context here? For example, should we only expect td, tr elements when
|
||||
// reading a table and ignore them otherwise? This may be too much of a load on the parser, I think it's better if the converter
|
||||
// deals with it
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
/// <summary>
|
||||
/// HtmlParser class accepts a string of possibly badly formed Html, parses it and returns a string
|
||||
/// of well-formed Html that is as close to the original string in content as possible
|
||||
/// </summary>
|
||||
internal class HtmlParser
|
||||
{
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Constructors
|
||||
|
||||
/// <summary>
|
||||
/// Constructor. Initializes the _htmlLexicalAnalayzer element with the given input string
|
||||
/// </summary>
|
||||
/// <param name="inputString">
|
||||
/// string to parsed into well-formed Html
|
||||
/// </param>
|
||||
private HtmlParser(string inputString)
|
||||
{
|
||||
// Create an output xml document
|
||||
_document = new XmlDocument();
|
||||
|
||||
// initialize open tag stack
|
||||
_openedElements = new Stack<XmlElement>();
|
||||
|
||||
_pendingInlineElements = new Stack<XmlElement>();
|
||||
|
||||
// initialize lexical analyzer
|
||||
_htmlLexicalAnalyzer = new HtmlLexicalAnalyzer(inputString);
|
||||
|
||||
// get first token from input, expecting text
|
||||
_htmlLexicalAnalyzer.GetNextContentToken();
|
||||
}
|
||||
|
||||
#endregion Constructors
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Internal Methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Internal Methods
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an HtmlParser element and calls the parsing function on the given input string
|
||||
/// </summary>
|
||||
/// <param name="htmlString">
|
||||
/// Input string of pssibly badly-formed Html to be parsed into well-formed Html
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// XmlElement rep
|
||||
/// </returns>
|
||||
internal static XmlElement ParseHtml(string htmlString)
|
||||
{
|
||||
var htmlParser = new HtmlParser(htmlString);
|
||||
|
||||
var htmlRootElement = htmlParser.ParseHtmlContent();
|
||||
|
||||
return htmlRootElement;
|
||||
}
|
||||
|
||||
// .....................................................................
|
||||
//
|
||||
// Html Header on Clipboard
|
||||
//
|
||||
// .....................................................................
|
||||
|
||||
// Html header structure.
|
||||
// Version:1.0
|
||||
// StartHTML:000000000
|
||||
// EndHTML:000000000
|
||||
// StartFragment:000000000
|
||||
// EndFragment:000000000
|
||||
// StartSelection:000000000
|
||||
// EndSelection:000000000
|
||||
internal const string HtmlHeader =
|
||||
"Version:1.0\r\nStartHTML:{0:D10}\r\nEndHTML:{1:D10}\r\nStartFragment:{2:D10}\r\nEndFragment:{3:D10}\r\nStartSelection:{4:D10}\r\nEndSelection:{5:D10}\r\n";
|
||||
|
||||
internal const string HtmlStartFragmentComment = "<!--StartFragment-->";
|
||||
internal const string HtmlEndFragmentComment = "<!--EndFragment-->";
|
||||
|
||||
/// <summary>
|
||||
/// Extracts Html string from clipboard data by parsing header information in htmlDataString
|
||||
/// </summary>
|
||||
/// <param name="htmlDataString">
|
||||
/// String representing Html clipboard data. This includes Html header
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// String containing only the Html data part of htmlDataString, without header
|
||||
/// </returns>
|
||||
internal static string ExtractHtmlFromClipboardData(string htmlDataString)
|
||||
{
|
||||
var startHtmlIndex = htmlDataString.IndexOf("StartHTML:", StringComparison.Ordinal);
|
||||
if (startHtmlIndex < 0)
|
||||
{
|
||||
return "ERROR: Urecognized html header";
|
||||
}
|
||||
// TODO: We assume that indices represented by strictly 10 zeros ("0123456789".Length),
|
||||
// which could be wrong assumption. We need to implement more flrxible parsing here
|
||||
startHtmlIndex =
|
||||
int.Parse(htmlDataString.Substring(startHtmlIndex + "StartHTML:".Length, "0123456789".Length));
|
||||
if (startHtmlIndex < 0 || startHtmlIndex > htmlDataString.Length)
|
||||
{
|
||||
return "ERROR: Urecognized html header";
|
||||
}
|
||||
|
||||
var endHtmlIndex = htmlDataString.IndexOf("EndHTML:", StringComparison.Ordinal);
|
||||
if (endHtmlIndex < 0)
|
||||
{
|
||||
return "ERROR: Urecognized html header";
|
||||
}
|
||||
// TODO: We assume that indices represented by strictly 10 zeros ("0123456789".Length),
|
||||
// which could be wrong assumption. We need to implement more flrxible parsing here
|
||||
endHtmlIndex = int.Parse(htmlDataString.Substring(endHtmlIndex + "EndHTML:".Length, "0123456789".Length));
|
||||
if (endHtmlIndex > htmlDataString.Length)
|
||||
{
|
||||
endHtmlIndex = htmlDataString.Length;
|
||||
}
|
||||
|
||||
return htmlDataString.Substring(startHtmlIndex, endHtmlIndex - startHtmlIndex);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds Xhtml header information to Html data string so that it can be placed on clipboard
|
||||
/// </summary>
|
||||
/// <param name="htmlString">
|
||||
/// Html string to be placed on clipboard with appropriate header
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// String wrapping htmlString with appropriate Html header
|
||||
/// </returns>
|
||||
internal static string AddHtmlClipboardHeader(string htmlString)
|
||||
{
|
||||
var stringBuilder = new StringBuilder();
|
||||
|
||||
// each of 6 numbers is represented by "{0:D10}" in the format string
|
||||
// must actually occupy 10 digit positions ("0123456789")
|
||||
var startHtml = HtmlHeader.Length + 6*("0123456789".Length - "{0:D10}".Length);
|
||||
var endHtml = startHtml + htmlString.Length;
|
||||
var startFragment = htmlString.IndexOf(HtmlStartFragmentComment, 0, StringComparison.Ordinal);
|
||||
if (startFragment >= 0)
|
||||
{
|
||||
startFragment = startHtml + startFragment + HtmlStartFragmentComment.Length;
|
||||
}
|
||||
else
|
||||
{
|
||||
startFragment = startHtml;
|
||||
}
|
||||
var endFragment = htmlString.IndexOf(HtmlEndFragmentComment, 0, StringComparison.Ordinal);
|
||||
if (endFragment >= 0)
|
||||
{
|
||||
endFragment = startHtml + endFragment;
|
||||
}
|
||||
else
|
||||
{
|
||||
endFragment = endHtml;
|
||||
}
|
||||
|
||||
// Create HTML clipboard header string
|
||||
stringBuilder.AppendFormat(HtmlHeader, startHtml, endHtml, startFragment, endFragment, startFragment,
|
||||
endFragment);
|
||||
|
||||
// Append HTML body.
|
||||
stringBuilder.Append(htmlString);
|
||||
|
||||
return stringBuilder.ToString();
|
||||
}
|
||||
|
||||
#endregion Internal Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Methods
|
||||
|
||||
private void InvariantAssert(bool condition, string message)
|
||||
{
|
||||
if (!condition)
|
||||
{
|
||||
throw new Exception("Assertion error: " + message);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the stream of html tokens starting
|
||||
/// from the name of top-level element.
|
||||
/// Returns XmlElement representing the top-level
|
||||
/// html element
|
||||
/// </summary>
|
||||
private XmlElement ParseHtmlContent()
|
||||
{
|
||||
// Create artificial root elelemt to be able to group multiple top-level elements
|
||||
// We create "html" element which may be a duplicate of real HTML element, which is ok, as HtmlConverter will swallow it painlessly..
|
||||
var htmlRootElement = _document.CreateElement("html", XhtmlNamespace);
|
||||
OpenStructuringElement(htmlRootElement);
|
||||
|
||||
while (_htmlLexicalAnalyzer.NextTokenType != HtmlTokenType.Eof)
|
||||
{
|
||||
if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.OpeningTagStart)
|
||||
{
|
||||
_htmlLexicalAnalyzer.GetNextTagToken();
|
||||
if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
|
||||
{
|
||||
var htmlElementName = _htmlLexicalAnalyzer.NextToken.ToLower();
|
||||
_htmlLexicalAnalyzer.GetNextTagToken();
|
||||
|
||||
// Create an element
|
||||
var htmlElement = _document.CreateElement(htmlElementName, XhtmlNamespace);
|
||||
|
||||
// Parse element attributes
|
||||
ParseAttributes(htmlElement);
|
||||
|
||||
if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.EmptyTagEnd ||
|
||||
HtmlSchema.IsEmptyElement(htmlElementName))
|
||||
{
|
||||
// It is an element without content (because of explicit slash or based on implicit knowledge aboout html)
|
||||
AddEmptyElement(htmlElement);
|
||||
}
|
||||
else if (HtmlSchema.IsInlineElement(htmlElementName))
|
||||
{
|
||||
// Elements known as formatting are pushed to some special
|
||||
// pending stack, which allows them to be transferred
|
||||
// over block tags - by doing this we convert
|
||||
// overlapping tags into normal heirarchical element structure.
|
||||
OpenInlineElement(htmlElement);
|
||||
}
|
||||
else if (HtmlSchema.IsBlockElement(htmlElementName) ||
|
||||
HtmlSchema.IsKnownOpenableElement(htmlElementName))
|
||||
{
|
||||
// This includes no-scope elements
|
||||
OpenStructuringElement(htmlElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.ClosingTagStart)
|
||||
{
|
||||
_htmlLexicalAnalyzer.GetNextTagToken();
|
||||
if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
|
||||
{
|
||||
var htmlElementName = _htmlLexicalAnalyzer.NextToken.ToLower();
|
||||
|
||||
// Skip the name token. Assume that the following token is end of tag,
|
||||
// but do not check this. If it is not true, we simply ignore one token
|
||||
// - this is our recovery from bad xml in this case.
|
||||
_htmlLexicalAnalyzer.GetNextTagToken();
|
||||
|
||||
CloseElement(htmlElementName);
|
||||
}
|
||||
}
|
||||
else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Text)
|
||||
{
|
||||
AddTextContent(_htmlLexicalAnalyzer.NextToken);
|
||||
}
|
||||
else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Comment)
|
||||
{
|
||||
AddComment(_htmlLexicalAnalyzer.NextToken);
|
||||
}
|
||||
|
||||
_htmlLexicalAnalyzer.GetNextContentToken();
|
||||
}
|
||||
|
||||
// Get rid of the artificial root element
|
||||
if (htmlRootElement.FirstChild is XmlElement &&
|
||||
htmlRootElement.FirstChild == htmlRootElement.LastChild &&
|
||||
htmlRootElement.FirstChild.LocalName.ToLower() == "html")
|
||||
{
|
||||
htmlRootElement = (XmlElement) htmlRootElement.FirstChild;
|
||||
}
|
||||
|
||||
return htmlRootElement;
|
||||
}
|
||||
|
||||
private XmlElement CreateElementCopy(XmlElement htmlElement)
|
||||
{
|
||||
var htmlElementCopy = _document.CreateElement(htmlElement.LocalName, XhtmlNamespace);
|
||||
for (var i = 0; i < htmlElement.Attributes.Count; i++)
|
||||
{
|
||||
var attribute = htmlElement.Attributes[i];
|
||||
htmlElementCopy.SetAttribute(attribute.Name, attribute.Value);
|
||||
}
|
||||
return htmlElementCopy;
|
||||
}
|
||||
|
||||
private void AddEmptyElement(XmlElement htmlEmptyElement)
|
||||
{
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"AddEmptyElement: Stack of opened elements cannot be empty, as we have at least one artificial root element");
|
||||
var htmlParent = _openedElements.Peek();
|
||||
htmlParent.AppendChild(htmlEmptyElement);
|
||||
}
|
||||
|
||||
private void OpenInlineElement(XmlElement htmlInlineElement)
|
||||
{
|
||||
_pendingInlineElements.Push(htmlInlineElement);
|
||||
}
|
||||
|
||||
// Opens structurig element such as Div or Table etc.
|
||||
private void OpenStructuringElement(XmlElement htmlElement)
|
||||
{
|
||||
// Close all pending inline elements
|
||||
// All block elements are considered as delimiters for inline elements
|
||||
// which forces all inline elements to be closed and re-opened in the following
|
||||
// structural element (if any).
|
||||
// By doing that we guarantee that all inline elements appear only within most nested blocks
|
||||
if (HtmlSchema.IsBlockElement(htmlElement.LocalName))
|
||||
{
|
||||
while (_openedElements.Count > 0 && HtmlSchema.IsInlineElement(_openedElements.Peek().LocalName))
|
||||
{
|
||||
var htmlInlineElement = _openedElements.Pop();
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"OpenStructuringElement: stack of opened elements cannot become empty here");
|
||||
|
||||
_pendingInlineElements.Push(CreateElementCopy(htmlInlineElement));
|
||||
}
|
||||
}
|
||||
|
||||
// Add this block element to its parent
|
||||
if (_openedElements.Count > 0)
|
||||
{
|
||||
var htmlParent = _openedElements.Peek();
|
||||
|
||||
// Check some known block elements for auto-closing (LI and P)
|
||||
if (HtmlSchema.ClosesOnNextElementStart(htmlParent.LocalName, htmlElement.LocalName))
|
||||
{
|
||||
_openedElements.Pop();
|
||||
htmlParent = _openedElements.Count > 0 ? _openedElements.Peek() : null;
|
||||
}
|
||||
|
||||
// NOTE:
|
||||
// Actually we never expect null - it would mean two top-level P or LI (without a parent).
|
||||
// In such weird case we will loose all paragraphs except the first one...
|
||||
htmlParent?.AppendChild(htmlElement);
|
||||
}
|
||||
|
||||
// Push it onto a stack
|
||||
_openedElements.Push(htmlElement);
|
||||
}
|
||||
|
||||
private bool IsElementOpened(string htmlElementName) => _openedElements.Any(openedElement => openedElement.LocalName == htmlElementName);
|
||||
|
||||
private void CloseElement(string htmlElementName)
|
||||
{
|
||||
// Check if the element is opened and already added to the parent
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"CloseElement: Stack of opened elements cannot be empty, as we have at least one artificial root element");
|
||||
|
||||
// Check if the element is opened and still waiting to be added to the parent
|
||||
if (_pendingInlineElements.Count > 0 && _pendingInlineElements.Peek().LocalName == htmlElementName)
|
||||
{
|
||||
// Closing an empty inline element.
|
||||
// Note that HtmlConverter will skip empty inlines, but for completeness we keep them here on parser level.
|
||||
var htmlInlineElement = _pendingInlineElements.Pop();
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"CloseElement: Stack of opened elements cannot be empty, as we have at least one artificial root element");
|
||||
var htmlParent = _openedElements.Peek();
|
||||
htmlParent.AppendChild(htmlInlineElement);
|
||||
}
|
||||
else if (IsElementOpened(htmlElementName))
|
||||
{
|
||||
while (_openedElements.Count > 1) // we never pop the last element - the artificial root
|
||||
{
|
||||
// Close all unbalanced elements.
|
||||
var htmlOpenedElement = _openedElements.Pop();
|
||||
|
||||
if (htmlOpenedElement.LocalName == htmlElementName)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (HtmlSchema.IsInlineElement(htmlOpenedElement.LocalName))
|
||||
{
|
||||
// Unbalances Inlines will be transfered to the next element content
|
||||
_pendingInlineElements.Push(CreateElementCopy(htmlOpenedElement));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If element was not opened, we simply ignore the unbalanced closing tag
|
||||
}
|
||||
|
||||
private void AddTextContent(string textContent)
|
||||
{
|
||||
OpenPendingInlineElements();
|
||||
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"AddTextContent: Stack of opened elements cannot be empty, as we have at least one artificial root element");
|
||||
|
||||
var htmlParent = _openedElements.Peek();
|
||||
var textNode = _document.CreateTextNode(textContent);
|
||||
htmlParent.AppendChild(textNode);
|
||||
}
|
||||
|
||||
private void AddComment(string comment)
|
||||
{
|
||||
OpenPendingInlineElements();
|
||||
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"AddComment: Stack of opened elements cannot be empty, as we have at least one artificial root element");
|
||||
|
||||
var htmlParent = _openedElements.Peek();
|
||||
var xmlComment = _document.CreateComment(comment);
|
||||
htmlParent.AppendChild(xmlComment);
|
||||
}
|
||||
|
||||
// Moves all inline elements pending for opening to actual document
|
||||
// and adds them to current open stack.
|
||||
private void OpenPendingInlineElements()
|
||||
{
|
||||
if (_pendingInlineElements.Count > 0)
|
||||
{
|
||||
var htmlInlineElement = _pendingInlineElements.Pop();
|
||||
|
||||
OpenPendingInlineElements();
|
||||
|
||||
InvariantAssert(_openedElements.Count > 0,
|
||||
"OpenPendingInlineElements: Stack of opened elements cannot be empty, as we have at least one artificial root element");
|
||||
|
||||
var htmlParent = _openedElements.Peek();
|
||||
htmlParent.AppendChild(htmlInlineElement);
|
||||
_openedElements.Push(htmlInlineElement);
|
||||
}
|
||||
}
|
||||
|
||||
private void ParseAttributes(XmlElement xmlElement)
|
||||
{
|
||||
while (_htmlLexicalAnalyzer.NextTokenType != HtmlTokenType.Eof && //
|
||||
_htmlLexicalAnalyzer.NextTokenType != HtmlTokenType.TagEnd && //
|
||||
_htmlLexicalAnalyzer.NextTokenType != HtmlTokenType.EmptyTagEnd)
|
||||
{
|
||||
// read next attribute (name=value)
|
||||
if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
|
||||
{
|
||||
var attributeName = _htmlLexicalAnalyzer.NextToken;
|
||||
_htmlLexicalAnalyzer.GetNextEqualSignToken();
|
||||
|
||||
_htmlLexicalAnalyzer.GetNextAtomToken();
|
||||
|
||||
var attributeValue = _htmlLexicalAnalyzer.NextToken;
|
||||
xmlElement.SetAttribute(attributeName, attributeValue);
|
||||
}
|
||||
_htmlLexicalAnalyzer.GetNextTagToken();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion Private Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Fields
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Fields
|
||||
|
||||
internal const string XhtmlNamespace = "http://www.w3.org/1999/xhtml";
|
||||
|
||||
private readonly HtmlLexicalAnalyzer _htmlLexicalAnalyzer;
|
||||
|
||||
// document from which all elements are created
|
||||
private readonly XmlDocument _document;
|
||||
|
||||
// stack for open elements
|
||||
private readonly Stack<XmlElement> _openedElements;
|
||||
private readonly Stack<XmlElement> _pendingInlineElements;
|
||||
|
||||
#endregion Private Fields
|
||||
}
|
||||
}
|
||||
733
JRCookbookBusiness/Converters/HtmlSchema.cs
Normal file
733
JRCookbookBusiness/Converters/HtmlSchema.cs
Normal file
@@ -0,0 +1,733 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System.Collections;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
/// <summary>
|
||||
/// HtmlSchema class
|
||||
/// maintains static information about HTML structure
|
||||
/// can be used by HtmlParser to check conditions under which an element starts or ends, etc.
|
||||
/// </summary>
|
||||
internal class HtmlSchema
|
||||
{
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Constructors
|
||||
|
||||
/// <summary>
|
||||
/// static constructor, initializes the ArrayLists
|
||||
/// that hold the elements in various sub-components of the schema
|
||||
/// e.g _htmlEmptyElements, etc.
|
||||
/// </summary>
|
||||
static HtmlSchema()
|
||||
{
|
||||
// initializes the list of all html elements
|
||||
InitializeInlineElements();
|
||||
|
||||
InitializeBlockElements();
|
||||
|
||||
InitializeOtherOpenableElements();
|
||||
|
||||
// initialize empty elements list
|
||||
InitializeEmptyElements();
|
||||
|
||||
// initialize list of elements closing on the outer element end
|
||||
InitializeElementsClosingOnParentElementEnd();
|
||||
|
||||
// initalize list of elements that close when a new element starts
|
||||
InitializeElementsClosingOnNewElementStart();
|
||||
|
||||
// Initialize character entities
|
||||
InitializeHtmlCharacterEntities();
|
||||
}
|
||||
|
||||
#endregion Constructors;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Internal Methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Internal Methods
|
||||
|
||||
/// <summary>
|
||||
/// returns true when xmlElementName corresponds to empty element
|
||||
/// </summary>
|
||||
/// <param name="xmlElementName">
|
||||
/// string representing name to test
|
||||
/// </param>
|
||||
internal static bool IsEmptyElement(string xmlElementName) => _htmlEmptyElements.Contains(xmlElementName.ToLower());
|
||||
|
||||
/// <summary>
|
||||
/// returns true if xmlElementName represents a block formattinng element.
|
||||
/// It used in an algorithm of transferring inline elements over block elements
|
||||
/// in HtmlParser
|
||||
/// </summary>
|
||||
/// <param name="xmlElementName"></param>
|
||||
/// <returns></returns>
|
||||
internal static bool IsBlockElement(string xmlElementName) => _htmlBlockElements.Contains(xmlElementName);
|
||||
|
||||
/// <summary>
|
||||
/// returns true if the xmlElementName represents an inline formatting element
|
||||
/// </summary>
|
||||
/// <param name="xmlElementName"></param>
|
||||
/// <returns></returns>
|
||||
internal static bool IsInlineElement(string xmlElementName) => _htmlInlineElements.Contains(xmlElementName);
|
||||
|
||||
/// <summary>
|
||||
/// It is a list of known html elements which we
|
||||
/// want to allow to produce bt HTML parser,
|
||||
/// but don'tt want to act as inline, block or no-scope.
|
||||
/// Presence in this list will allow to open
|
||||
/// elements during html parsing, and adding the
|
||||
/// to a tree produced by html parser.
|
||||
/// </summary>
|
||||
internal static bool IsKnownOpenableElement(string xmlElementName) => _htmlOtherOpenableElements.Contains(xmlElementName);
|
||||
|
||||
/// <summary>
|
||||
/// returns true when xmlElementName closes when the outer element closes
|
||||
/// this is true of elements with optional start tags
|
||||
/// </summary>
|
||||
/// <param name="xmlElementName">
|
||||
/// string representing name to test
|
||||
/// </param>
|
||||
internal static bool ClosesOnParentElementEnd(string xmlElementName) => _htmlElementsClosingOnParentElementEnd.Contains(xmlElementName.ToLower());
|
||||
|
||||
/// <summary>
|
||||
/// returns true if the current element closes when the new element, whose name has just been read, starts
|
||||
/// </summary>
|
||||
/// <param name="currentElementName">
|
||||
/// string representing current element name
|
||||
/// </param>
|
||||
/// <param name="elementName"></param>
|
||||
/// string representing name of the next element that will start
|
||||
internal static bool ClosesOnNextElementStart(string currentElementName, string nextElementName)
|
||||
{
|
||||
Debug.Assert(currentElementName == currentElementName.ToLower());
|
||||
switch (currentElementName)
|
||||
{
|
||||
case "colgroup":
|
||||
return _htmlElementsClosingColgroup.Contains(nextElementName) && IsBlockElement(nextElementName);
|
||||
case "dd":
|
||||
return _htmlElementsClosingDd.Contains(nextElementName) && IsBlockElement(nextElementName);
|
||||
case "dt":
|
||||
return _htmlElementsClosingDt.Contains(nextElementName) && IsBlockElement(nextElementName);
|
||||
case "li":
|
||||
return _htmlElementsClosingLi.Contains(nextElementName);
|
||||
case "p":
|
||||
return IsBlockElement(nextElementName);
|
||||
case "tbody":
|
||||
return _htmlElementsClosingTbody.Contains(nextElementName);
|
||||
case "tfoot":
|
||||
return _htmlElementsClosingTfoot.Contains(nextElementName);
|
||||
case "thead":
|
||||
return _htmlElementsClosingThead.Contains(nextElementName);
|
||||
case "tr":
|
||||
return _htmlElementsClosingTr.Contains(nextElementName);
|
||||
case "td":
|
||||
return _htmlElementsClosingTd.Contains(nextElementName);
|
||||
case "th":
|
||||
return _htmlElementsClosingTh.Contains(nextElementName);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// returns true if the string passed as argument is an Html entity name
|
||||
/// </summary>
|
||||
/// <param name="entityName">
|
||||
/// string to be tested for Html entity name
|
||||
/// </param>
|
||||
internal static bool IsEntity(string entityName)
|
||||
{
|
||||
// we do not convert entity strings to lowercase because these names are case-sensitive
|
||||
if (_htmlCharacterEntities.Contains(entityName))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// returns the character represented by the entity name string which is passed as an argument, if the string is an
|
||||
/// entity name
|
||||
/// as specified in _htmlCharacterEntities, returns the character value of 0 otherwise
|
||||
/// </summary>
|
||||
/// <param name="entityName">
|
||||
/// string representing entity name whose character value is desired
|
||||
/// </param>
|
||||
internal static char EntityCharacterValue(string entityName)
|
||||
{
|
||||
if (_htmlCharacterEntities.Contains(entityName))
|
||||
{
|
||||
return (char) _htmlCharacterEntities[entityName];
|
||||
}
|
||||
return (char) 0;
|
||||
}
|
||||
|
||||
#endregion Internal Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Internal Properties
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Internal Properties
|
||||
|
||||
#endregion Internal Indexers
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Methods
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Methods
|
||||
|
||||
private static void InitializeInlineElements()
|
||||
{
|
||||
_htmlInlineElements = new ArrayList
|
||||
{
|
||||
"a",
|
||||
"abbr",
|
||||
"acronym",
|
||||
"address",
|
||||
"b",
|
||||
"bdo",
|
||||
"big",
|
||||
"button",
|
||||
"code",
|
||||
"del",
|
||||
"dfn",
|
||||
"em",
|
||||
"font",
|
||||
"i",
|
||||
"ins",
|
||||
"kbd",
|
||||
"label",
|
||||
"legend",
|
||||
"q",
|
||||
"s",
|
||||
"samp",
|
||||
"small",
|
||||
"span",
|
||||
"strike",
|
||||
"strong",
|
||||
"sub",
|
||||
"sup",
|
||||
"u",
|
||||
"var"
|
||||
};
|
||||
// ???
|
||||
// deleted text
|
||||
// inserted text
|
||||
// text to entered by a user
|
||||
// ???
|
||||
// short inline quotation
|
||||
// strike-through text style
|
||||
// Specifies a code sample
|
||||
// indicates an instance of a program variable
|
||||
}
|
||||
|
||||
private static void InitializeBlockElements()
|
||||
{
|
||||
_htmlBlockElements = new ArrayList
|
||||
{
|
||||
"blockquote",
|
||||
"body",
|
||||
"caption",
|
||||
"center",
|
||||
"cite",
|
||||
"dd",
|
||||
"dir",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"form",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"html",
|
||||
"li",
|
||||
"menu",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"textarea",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"tr",
|
||||
"tt",
|
||||
"ul"
|
||||
};
|
||||
|
||||
// treat as UL element
|
||||
// Not a block according to XHTML spec
|
||||
// treat as UL element
|
||||
// Renders text in a fixed-width font
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// initializes _htmlEmptyElements with empty elements in HTML 4 spec at
|
||||
/// http://www.w3.org/TR/REC-html40/index/elements.html
|
||||
/// </summary>
|
||||
private static void InitializeEmptyElements()
|
||||
{
|
||||
// Build a list of empty (no-scope) elements
|
||||
// (element not requiring closing tags, and not accepting any content)
|
||||
_htmlEmptyElements = new ArrayList
|
||||
{
|
||||
"area",
|
||||
"base",
|
||||
"basefont",
|
||||
"br",
|
||||
"col",
|
||||
"frame",
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"isindex",
|
||||
"link",
|
||||
"meta",
|
||||
"param"
|
||||
};
|
||||
}
|
||||
|
||||
private static void InitializeOtherOpenableElements()
|
||||
{
|
||||
// It is a list of known html elements which we
|
||||
// want to allow to produce bt HTML parser,
|
||||
// but don'tt want to act as inline, block or no-scope.
|
||||
// Presence in this list will allow to open
|
||||
// elements during html parsing, and adding the
|
||||
// to a tree produced by html parser.
|
||||
_htmlOtherOpenableElements = new ArrayList
|
||||
{
|
||||
"applet",
|
||||
"base",
|
||||
"basefont",
|
||||
"colgroup",
|
||||
"fieldset",
|
||||
"frameset",
|
||||
"head",
|
||||
"iframe",
|
||||
"map",
|
||||
"noframes",
|
||||
"noscript",
|
||||
"object",
|
||||
"optgroup",
|
||||
"option",
|
||||
"script",
|
||||
"select",
|
||||
"style",
|
||||
"title"
|
||||
};
|
||||
//_htmlOtherOpenableElements.Add("form"); --> treated as block
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// initializes _htmlElementsClosingOnParentElementEnd with the list of HTML 4 elements for which closing tags are
|
||||
/// optional
|
||||
/// we assume that for any element for which closing tags are optional, the element closes when it's outer element
|
||||
/// (in which it is nested) does
|
||||
/// </summary>
|
||||
private static void InitializeElementsClosingOnParentElementEnd()
|
||||
{
|
||||
_htmlElementsClosingOnParentElementEnd = new ArrayList
|
||||
{
|
||||
"body",
|
||||
"colgroup",
|
||||
"dd",
|
||||
"dt",
|
||||
"head",
|
||||
"html",
|
||||
"li",
|
||||
"p",
|
||||
"tbody",
|
||||
"td",
|
||||
"tfoot",
|
||||
"thead",
|
||||
"th",
|
||||
"tr"
|
||||
};
|
||||
}
|
||||
|
||||
private static void InitializeElementsClosingOnNewElementStart()
|
||||
{
|
||||
_htmlElementsClosingColgroup = new ArrayList {"colgroup", "tr", "thead", "tfoot", "tbody"};
|
||||
|
||||
_htmlElementsClosingDd = new ArrayList {"dd", "dt"};
|
||||
// TODO: dd may end in other cases as well - if a new "p" starts, etc.
|
||||
// TODO: these are the basic "legal" cases but there may be more recovery
|
||||
|
||||
_htmlElementsClosingDt = new ArrayList();
|
||||
_htmlElementsClosingDd.Add("dd");
|
||||
_htmlElementsClosingDd.Add("dt");
|
||||
// TODO: dd may end in other cases as well - if a new "p" starts, etc.
|
||||
// TODO: these are the basic "legal" cases but there may be more recovery
|
||||
|
||||
_htmlElementsClosingLi = new ArrayList {"li"};
|
||||
// TODO: more complex recovery
|
||||
|
||||
_htmlElementsClosingTbody = new ArrayList {"tbody", "thead", "tfoot"};
|
||||
// TODO: more complex recovery
|
||||
|
||||
_htmlElementsClosingTr = new ArrayList {"thead", "tfoot", "tbody", "tr"};
|
||||
// NOTE: tr should not really close on a new thead
|
||||
// because if there are rows before a thead, it is assumed to be in tbody, whose start tag is optional
|
||||
// and thead can't come after tbody
|
||||
// however, if we do encounter this, it's probably best to end the row and ignore the thead or treat
|
||||
// it as part of the table
|
||||
// TODO: more complex recovery
|
||||
|
||||
_htmlElementsClosingTd = new ArrayList {"td", "th", "tr", "tbody", "tfoot", "thead"};
|
||||
// TODO: more complex recovery
|
||||
|
||||
_htmlElementsClosingTh = new ArrayList {"td", "th", "tr", "tbody", "tfoot", "thead"};
|
||||
// TODO: more complex recovery
|
||||
|
||||
_htmlElementsClosingThead = new ArrayList {"tbody", "tfoot"};
|
||||
// TODO: more complex recovery
|
||||
|
||||
_htmlElementsClosingTfoot = new ArrayList {"tbody", "thead"};
|
||||
// although thead comes before tfoot, we add it because if it is found the tfoot should close
|
||||
// and some recovery processing be done on the thead
|
||||
// TODO: more complex recovery
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// initializes _htmlCharacterEntities hashtable with the character corresponding to entity names
|
||||
/// </summary>
|
||||
private static void InitializeHtmlCharacterEntities()
|
||||
{
|
||||
_htmlCharacterEntities = new Hashtable
|
||||
{
|
||||
["Aacute"] = (char) 193,
|
||||
["aacute"] = (char) 225,
|
||||
["Acirc"] = (char) 194,
|
||||
["acirc"] = (char) 226,
|
||||
["acute"] = (char) 180,
|
||||
["AElig"] = (char) 198,
|
||||
["aelig"] = (char) 230,
|
||||
["Agrave"] = (char) 192,
|
||||
["agrave"] = (char) 224,
|
||||
["alefsym"] = (char) 8501,
|
||||
["Alpha"] = (char) 913,
|
||||
["alpha"] = (char) 945,
|
||||
["amp"] = (char) 38,
|
||||
["and"] = (char) 8743,
|
||||
["ang"] = (char) 8736,
|
||||
["Aring"] = (char) 197,
|
||||
["aring"] = (char) 229,
|
||||
["asymp"] = (char) 8776,
|
||||
["Atilde"] = (char) 195,
|
||||
["atilde"] = (char) 227,
|
||||
["Auml"] = (char) 196,
|
||||
["auml"] = (char) 228,
|
||||
["bdquo"] = (char) 8222,
|
||||
["Beta"] = (char) 914,
|
||||
["beta"] = (char) 946,
|
||||
["brvbar"] = (char) 166,
|
||||
["bull"] = (char) 8226,
|
||||
["cap"] = (char) 8745,
|
||||
["Ccedil"] = (char) 199,
|
||||
["ccedil"] = (char) 231,
|
||||
["cent"] = (char) 162,
|
||||
["Chi"] = (char) 935,
|
||||
["chi"] = (char) 967,
|
||||
["circ"] = (char) 710,
|
||||
["clubs"] = (char) 9827,
|
||||
["cong"] = (char) 8773,
|
||||
["copy"] = (char) 169,
|
||||
["crarr"] = (char) 8629,
|
||||
["cup"] = (char) 8746,
|
||||
["curren"] = (char) 164,
|
||||
["dagger"] = (char) 8224,
|
||||
["Dagger"] = (char) 8225,
|
||||
["darr"] = (char) 8595,
|
||||
["dArr"] = (char) 8659,
|
||||
["deg"] = (char) 176,
|
||||
["Delta"] = (char) 916,
|
||||
["delta"] = (char) 948,
|
||||
["diams"] = (char) 9830,
|
||||
["divide"] = (char) 247,
|
||||
["Eacute"] = (char) 201,
|
||||
["eacute"] = (char) 233,
|
||||
["Ecirc"] = (char) 202,
|
||||
["ecirc"] = (char) 234,
|
||||
["Egrave"] = (char) 200,
|
||||
["egrave"] = (char) 232,
|
||||
["empty"] = (char) 8709,
|
||||
["emsp"] = (char) 8195,
|
||||
["ensp"] = (char) 8194,
|
||||
["Epsilon"] = (char) 917,
|
||||
["epsilon"] = (char) 949,
|
||||
["equiv"] = (char) 8801,
|
||||
["Eta"] = (char) 919,
|
||||
["eta"] = (char) 951,
|
||||
["ETH"] = (char) 208,
|
||||
["eth"] = (char) 240,
|
||||
["Euml"] = (char) 203,
|
||||
["euml"] = (char) 235,
|
||||
["euro"] = (char) 8364,
|
||||
["exist"] = (char) 8707,
|
||||
["fnof"] = (char) 402,
|
||||
["forall"] = (char) 8704,
|
||||
["frac12"] = (char) 189,
|
||||
["frac14"] = (char) 188,
|
||||
["frac34"] = (char) 190,
|
||||
["frasl"] = (char) 8260,
|
||||
["Gamma"] = (char) 915,
|
||||
["gamma"] = (char) 947,
|
||||
["ge"] = (char) 8805,
|
||||
["gt"] = (char) 62,
|
||||
["harr"] = (char) 8596,
|
||||
["hArr"] = (char) 8660,
|
||||
["hearts"] = (char) 9829,
|
||||
["hellip"] = (char) 8230,
|
||||
["Iacute"] = (char) 205,
|
||||
["iacute"] = (char) 237,
|
||||
["Icirc"] = (char) 206,
|
||||
["icirc"] = (char) 238,
|
||||
["iexcl"] = (char) 161,
|
||||
["Igrave"] = (char) 204,
|
||||
["igrave"] = (char) 236,
|
||||
["image"] = (char) 8465,
|
||||
["infin"] = (char) 8734,
|
||||
["int"] = (char) 8747,
|
||||
["Iota"] = (char) 921,
|
||||
["iota"] = (char) 953,
|
||||
["iquest"] = (char) 191,
|
||||
["isin"] = (char) 8712,
|
||||
["Iuml"] = (char) 207,
|
||||
["iuml"] = (char) 239,
|
||||
["Kappa"] = (char) 922,
|
||||
["kappa"] = (char) 954,
|
||||
["Lambda"] = (char) 923,
|
||||
["lambda"] = (char) 955,
|
||||
["lang"] = (char) 9001,
|
||||
["laquo"] = (char) 171,
|
||||
["larr"] = (char) 8592,
|
||||
["lArr"] = (char) 8656,
|
||||
["lceil"] = (char) 8968,
|
||||
["ldquo"] = (char) 8220,
|
||||
["le"] = (char) 8804,
|
||||
["lfloor"] = (char) 8970,
|
||||
["lowast"] = (char) 8727,
|
||||
["loz"] = (char) 9674,
|
||||
["lrm"] = (char) 8206,
|
||||
["lsaquo"] = (char) 8249,
|
||||
["lsquo"] = (char) 8216,
|
||||
["lt"] = (char) 60,
|
||||
["macr"] = (char) 175,
|
||||
["mdash"] = (char) 8212,
|
||||
["micro"] = (char) 181,
|
||||
["middot"] = (char) 183,
|
||||
["minus"] = (char) 8722,
|
||||
["Mu"] = (char) 924,
|
||||
["mu"] = (char) 956,
|
||||
["nabla"] = (char) 8711,
|
||||
["nbsp"] = (char) 160,
|
||||
["ndash"] = (char) 8211,
|
||||
["ne"] = (char) 8800,
|
||||
["ni"] = (char) 8715,
|
||||
["not"] = (char) 172,
|
||||
["notin"] = (char) 8713,
|
||||
["nsub"] = (char) 8836,
|
||||
["Ntilde"] = (char) 209,
|
||||
["ntilde"] = (char) 241,
|
||||
["Nu"] = (char) 925,
|
||||
["nu"] = (char) 957,
|
||||
["Oacute"] = (char) 211,
|
||||
["ocirc"] = (char) 244,
|
||||
["OElig"] = (char) 338,
|
||||
["oelig"] = (char) 339,
|
||||
["Ograve"] = (char) 210,
|
||||
["ograve"] = (char) 242,
|
||||
["oline"] = (char) 8254,
|
||||
["Omega"] = (char) 937,
|
||||
["omega"] = (char) 969,
|
||||
["Omicron"] = (char) 927,
|
||||
["omicron"] = (char) 959,
|
||||
["oplus"] = (char) 8853,
|
||||
["or"] = (char) 8744,
|
||||
["ordf"] = (char) 170,
|
||||
["ordm"] = (char) 186,
|
||||
["Oslash"] = (char) 216,
|
||||
["oslash"] = (char) 248,
|
||||
["Otilde"] = (char) 213,
|
||||
["otilde"] = (char) 245,
|
||||
["otimes"] = (char) 8855,
|
||||
["Ouml"] = (char) 214,
|
||||
["ouml"] = (char) 246,
|
||||
["para"] = (char) 182,
|
||||
["part"] = (char) 8706,
|
||||
["permil"] = (char) 8240,
|
||||
["perp"] = (char) 8869,
|
||||
["Phi"] = (char) 934,
|
||||
["phi"] = (char) 966,
|
||||
["pi"] = (char) 960,
|
||||
["piv"] = (char) 982,
|
||||
["plusmn"] = (char) 177,
|
||||
["pound"] = (char) 163,
|
||||
["prime"] = (char) 8242,
|
||||
["Prime"] = (char) 8243,
|
||||
["prod"] = (char) 8719,
|
||||
["prop"] = (char) 8733,
|
||||
["Psi"] = (char) 936,
|
||||
["psi"] = (char) 968,
|
||||
["quot"] = (char) 34,
|
||||
["radic"] = (char) 8730,
|
||||
["rang"] = (char) 9002,
|
||||
["raquo"] = (char) 187,
|
||||
["rarr"] = (char) 8594,
|
||||
["rArr"] = (char) 8658,
|
||||
["rceil"] = (char) 8969,
|
||||
["rdquo"] = (char) 8221,
|
||||
["real"] = (char) 8476,
|
||||
["reg"] = (char) 174,
|
||||
["rfloor"] = (char) 8971,
|
||||
["Rho"] = (char) 929,
|
||||
["rho"] = (char) 961,
|
||||
["rlm"] = (char) 8207,
|
||||
["rsaquo"] = (char) 8250,
|
||||
["rsquo"] = (char) 8217,
|
||||
["sbquo"] = (char) 8218,
|
||||
["Scaron"] = (char) 352,
|
||||
["scaron"] = (char) 353,
|
||||
["sdot"] = (char) 8901,
|
||||
["sect"] = (char) 167,
|
||||
["shy"] = (char) 173,
|
||||
["Sigma"] = (char) 931,
|
||||
["sigma"] = (char) 963,
|
||||
["sigmaf"] = (char) 962,
|
||||
["sim"] = (char) 8764,
|
||||
["spades"] = (char) 9824,
|
||||
["sub"] = (char) 8834,
|
||||
["sube"] = (char) 8838,
|
||||
["sum"] = (char) 8721,
|
||||
["sup"] = (char) 8835,
|
||||
["sup1"] = (char) 185,
|
||||
["sup2"] = (char) 178,
|
||||
["sup3"] = (char) 179,
|
||||
["supe"] = (char) 8839,
|
||||
["szlig"] = (char) 223,
|
||||
["Tau"] = (char) 932,
|
||||
["tau"] = (char) 964,
|
||||
["there4"] = (char) 8756,
|
||||
["Theta"] = (char) 920,
|
||||
["theta"] = (char) 952,
|
||||
["thetasym"] = (char) 977,
|
||||
["thinsp"] = (char) 8201,
|
||||
["THORN"] = (char) 222,
|
||||
["thorn"] = (char) 254,
|
||||
["tilde"] = (char) 732,
|
||||
["times"] = (char) 215,
|
||||
["trade"] = (char) 8482,
|
||||
["Uacute"] = (char) 218,
|
||||
["uacute"] = (char) 250,
|
||||
["uarr"] = (char) 8593,
|
||||
["uArr"] = (char) 8657,
|
||||
["Ucirc"] = (char) 219,
|
||||
["ucirc"] = (char) 251,
|
||||
["Ugrave"] = (char) 217,
|
||||
["ugrave"] = (char) 249,
|
||||
["uml"] = (char) 168,
|
||||
["upsih"] = (char) 978,
|
||||
["Upsilon"] = (char) 933,
|
||||
["upsilon"] = (char) 965,
|
||||
["Uuml"] = (char) 220,
|
||||
["uuml"] = (char) 252,
|
||||
["weierp"] = (char) 8472,
|
||||
["Xi"] = (char) 926,
|
||||
["xi"] = (char) 958,
|
||||
["Yacute"] = (char) 221,
|
||||
["yacute"] = (char) 253,
|
||||
["yen"] = (char) 165,
|
||||
["Yuml"] = (char) 376,
|
||||
["yuml"] = (char) 255,
|
||||
["Zeta"] = (char) 918,
|
||||
["zeta"] = (char) 950,
|
||||
["zwj"] = (char) 8205,
|
||||
["zwnj"] = (char) 8204
|
||||
};
|
||||
}
|
||||
|
||||
#endregion Private Methods
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
//
|
||||
// Private Fields
|
||||
//
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
#region Private Fields
|
||||
|
||||
// html element names
|
||||
// this is an array list now, but we may want to make it a hashtable later for better performance
|
||||
private static ArrayList _htmlInlineElements;
|
||||
|
||||
private static ArrayList _htmlBlockElements;
|
||||
|
||||
private static ArrayList _htmlOtherOpenableElements;
|
||||
|
||||
// list of html empty element names
|
||||
private static ArrayList _htmlEmptyElements;
|
||||
|
||||
// names of html elements for which closing tags are optional, and close when the outer nested element closes
|
||||
private static ArrayList _htmlElementsClosingOnParentElementEnd;
|
||||
|
||||
// names of elements that close certain optional closing tag elements when they start
|
||||
|
||||
// names of elements closing the colgroup element
|
||||
private static ArrayList _htmlElementsClosingColgroup;
|
||||
|
||||
// names of elements closing the dd element
|
||||
private static ArrayList _htmlElementsClosingDd;
|
||||
|
||||
// names of elements closing the dt element
|
||||
private static ArrayList _htmlElementsClosingDt;
|
||||
|
||||
// names of elements closing the li element
|
||||
private static ArrayList _htmlElementsClosingLi;
|
||||
|
||||
// names of elements closing the tbody element
|
||||
private static ArrayList _htmlElementsClosingTbody;
|
||||
|
||||
// names of elements closing the td element
|
||||
private static ArrayList _htmlElementsClosingTd;
|
||||
|
||||
// names of elements closing the tfoot element
|
||||
private static ArrayList _htmlElementsClosingTfoot;
|
||||
|
||||
// names of elements closing the thead element
|
||||
private static ArrayList _htmlElementsClosingThead;
|
||||
|
||||
// names of elements closing the th element
|
||||
private static ArrayList _htmlElementsClosingTh;
|
||||
|
||||
// names of elements closing the tr element
|
||||
private static ArrayList _htmlElementsClosingTr;
|
||||
|
||||
// html character entities hashtable
|
||||
private static Hashtable _htmlCharacterEntities;
|
||||
|
||||
#endregion Private Fields
|
||||
}
|
||||
}
|
||||
2765
JRCookbookBusiness/Converters/HtmlToXamlConverter.cs
Normal file
2765
JRCookbookBusiness/Converters/HtmlToXamlConverter.cs
Normal file
File diff suppressed because it is too large
Load Diff
22
JRCookbookBusiness/Converters/HtmlTokenType.cs
Normal file
22
JRCookbookBusiness/Converters/HtmlTokenType.cs
Normal file
@@ -0,0 +1,22 @@
|
||||
// // Copyright (c) Microsoft. All rights reserved.
|
||||
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
namespace HtmlToXamlDemo
|
||||
{
|
||||
/// <summary>
|
||||
/// types of lexical tokens for html-to-xaml converter
|
||||
/// </summary>
|
||||
internal enum HtmlTokenType
|
||||
{
|
||||
OpeningTagStart,
|
||||
ClosingTagStart,
|
||||
TagEnd,
|
||||
EmptyTagEnd,
|
||||
EqualSign,
|
||||
Name,
|
||||
Atom, // any attribute value not in quotes
|
||||
Text, //text content when accepting text
|
||||
Comment,
|
||||
Eof
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user