1 /**
2 * Copyright © DiamondMVC 2019
3 * License: MIT (https://github.com/DiamondMVC/Diamond/blob/master/LICENSE)
4 * Author: Jacob Jensen (bausshf)
5 */
6 module diamond.html.htmldocument;
7 
8 import diamond.html.htmlexception;
9 import diamond.dom.domdocument;
10 import diamond.dom.domnode;
11 import diamond.dom.domparsersettings;
12 import diamond.html.htmlnode;
13 
14 /// An HTML document.
15 final class HtmlDocument : DomDocument
16 {
17   private:
18   /// The root nodes.
19   HtmlNode[] _rootNodes;
20   /// The doctype node.
21   HtmlNode _doctype;
22   /// The head node.
23   HtmlNode _head;
24   /// The body node.
25   HtmlNode _body;
26 
27   public:
28   final:
29   /**
30   * Creates a new html document.
31   * Params:
32   *   parserSettings = The settings used for parsing the document.
33   */
34   this(DomParserSettings parserSettings) @safe
35   {
36     super(parserSettings);
37   }
38 
39   /**
40   * Parses the elements from the dom to the document.
41   * Params:
42   *   elements = The parsed dom elements.
43   */
44   override void parseElements(DomNode[] elements) @safe
45   {
46     if (!elements)
47     {
48       return;
49     }
50 
51     foreach (element; elements)
52     {
53       import std..string : toLower;
54 
55       if (element.name.toLower() == "doctype")
56       {
57         _doctype = element;
58       }
59       else
60       {
61         if (element.name.toLower() == "head")
62         {
63           _head = element;
64         }
65         else if (element.name.toLower() == "body")
66         {
67           _body = element;
68         }
69         else
70         {
71           if (element.name.toLower() == "html")
72           {
73             if (element.children)
74             {
75               foreach (child; element.children)
76               {
77                 if (child.name.toLower() == "head")
78                 {
79                   _head = child;
80                 }
81                 else if (child.name.toLower() == "body")
82                 {
83                   _body = child;
84                 }
85               }
86             }
87           }
88         }
89 
90         _rootNodes ~= element;
91       }
92     }
93   }
94 
95   @property
96   {
97     /// Gets the root nodes of the html document.
98     HtmlNode[] rootNodes() @safe { return _rootNodes; }
99 
100     /// Sets the root nodes of the html document.
101     void root(HtmlNode[] nodes) @safe
102     {
103       _rootNodes = nodes;
104 
105       if (!_rootNodes)
106       {
107         return;
108       }
109 
110       foreach (element; _rootNodes)
111       {
112         import std..string : toLower;
113 
114         if (element.name.toLower() == "doctype")
115         {
116           _doctype = element;
117         }
118         else if (element.name.toLower() == "head")
119         {
120           _head = element;
121         }
122         else if (element.name.toLower() == "body")
123         {
124           _body = element;
125         }
126         else if (element.name.toLower() == "html")
127         {
128           if (element.children)
129           {
130             foreach (child; element.children)
131             {
132               if (child.name.toLower() == "head")
133               {
134                 _head = child;
135               }
136               else if (child.name.toLower() == "body")
137               {
138                 _body = child;
139               }
140             }
141           }
142         }
143       }
144     }
145 
146     /// Gets the head node.
147     HtmlNode head() @safe { return _head; }
148 
149     /// Gets the body node.
150     HtmlNode body() @safe { return _body; }
151   }
152 
153   /**
154   * Queries all dom nodes based on a css3 selector.
155   * Params:
156   *   selector = The css3 selector.
157   * Returns:
158   *   An array of all matching nodes.
159   */
160   HtmlNode[] querySelectorAll(string selector)
161   {
162     import std.array : array;
163     import std.algorithm : map, filter, sort, group;
164 
165     HtmlNode[] elements;
166 
167     auto dummyNode = new HtmlNode(null);
168 
169     foreach (rootNode; _rootNodes)
170     {
171       dummyNode.addChild(rootNode);
172 
173       elements ~= dummyNode.querySelectorAll(selector);
174     }
175 
176     return elements ? elements.sort.group.map!(g => g[0]).array : [];
177   }
178 
179   /**
180   * Queries the first dom node based on a css3 selector.
181   * Params:
182   *   selector = The css3 selector.
183   * Returns:
184   *   The node if found, null otherwise.
185   */
186   HtmlNode querySelector(string selector)
187   {
188     auto result = querySelectorAll(selector);
189 
190     if (!result || !result.length)
191     {
192       return null;
193     }
194 
195     return result[0];
196   }
197 
198   /**
199   * Gets a dom node by an attribute named "id" matching the given value.
200   * Params:
201   *   id = The id of the node to retrieve.
202   * Returns:
203   *   The dom node if found, null otherwise.
204   */
205   HtmlNode getElementById(string id) @safe
206   {
207     foreach (rootNode; _rootNodes)
208     {
209       if (rootNode.hasAttribute("id", id))
210       {
211         return rootNode;
212       }
213 
214       auto element = rootNode.getElementById(id);
215 
216       if (element)
217       {
218         return element;
219       }
220     }
221 
222     return null;
223   }
224 
225   /// Repairs the html document if possible.
226   override void repairDocument() @safe
227   {
228     import std.algorithm : filter;
229     import std.array : array;
230     import std..string : toLower, stripLeft, stripRight, strip;
231 
232     auto htmlNodes = _rootNodes.filter!(n => n.name.toLower() == "html").array;
233     auto htmlNode = htmlNodes && htmlNodes.length ? htmlNodes[0] : new HtmlNode(null);
234 
235     auto headNodes = _head ? [_head] : (_rootNodes ~ (htmlNode.children ? htmlNode.children : [])).filter!(n => n.name.toLower() == "head").array;
236     auto bodyNodes = _body ? [_body] : (_rootNodes ~ (htmlNode.children ? htmlNode.children : [])).filter!(n => n.name.toLower() == "body").array;
237 
238     if (!htmlNode.parserSettings)
239     {
240       htmlNode.parserSettings = super.parserSettings;
241     }
242 
243     auto newRootNodes = [htmlNode];
244 
245     auto headNode = headNodes && headNodes.length ? headNodes[0] : new HtmlNode(htmlNode);
246 
247     if (!_head)
248     {
249       _head = headNode;
250       _head.parserSettings = super.parserSettings;
251 
252       htmlNode.addChild(_head);
253     }
254 
255     auto bodyNode = bodyNodes && bodyNodes.length ? bodyNodes[0] : new HtmlNode(htmlNode);
256 
257     if (!_body)
258     {
259       _body = bodyNode;
260       _body.parserSettings = super.parserSettings;
261 
262       htmlNode.addChild(_body);
263     }
264 
265     foreach (rootNode; _rootNodes ~ (htmlNode.children ? htmlNode.children : []))
266     {
267       // Repair broken names.
268       rootNode.name = rootNode.name.stripLeft("/").stripRight("/").strip();
269 
270       if
271       (
272         rootNode.name.toLower() == "doctype" ||
273         rootNode.name.toLower() == "html" ||
274         rootNode.name.toLower() == "head" ||
275         rootNode.name.toLower() == "body"
276       )
277       {
278         continue;
279       }
280 
281       // TODO: Repair the order of the elements
282       // Use DomNode._nodeId to order all elements correctly.
283 
284       if (super.parserSettings.isHeadTag(rootNode.name))
285       {
286         headNode.addChild(rootNode);
287       }
288       else if (super.parserSettings.isBodyTag(rootNode.name))
289       {
290         bodyNode.addChild(rootNode);
291       }
292       else
293       {
294         htmlNode.addChild(rootNode);
295       }
296     }
297 
298     _rootNodes = newRootNodes;
299   }
300 
301   /**
302   * Converts the html document to a properly formatted html document-string.
303   * Returns:
304   *   A string equivalent to the properly formatted html document-string.
305   */
306   override string toString() @safe
307   {
308     import std.array : join, array;
309     import std.algorithm : map;
310     import std..string : format;
311 
312     return (_doctype ? "<!%s %s>\r\n".format(_doctype.name, _doctype.getAttributes().map!(a => a.value ? "%s=\"%s\"".format(a.name, a.value) : a.name).array.join(" ")) : "") ~ (_rootNodes ? join(_rootNodes.map!(n => n.toString).array, "\r\n") : "");
313   }
314 }