Traversing the DOM & maintaining node hierarchy

M

matth

I've been working on something that deals with handling a user's
selection within the DOM and I'm tripping up on one last, but crucial,
detail.
Forgive me for the length of the code, but my question is pretty
straightforward and my brain hasn't been working.

Problem: The way I'm iterating through the nodes doesn't allow me to
preserve the node hierarchy.

Instead of:
<div></div><b></b>text

I want:
<div><b>text</b></div>



function RangeIterator() {
this.onNode = function (str, node) {
commonplaceLogMain.cplNodes += '<' + node.nodeName + '>' + str + '</'
+ node.nodeName + '>' ;
};

this.iterate = function (r) {
var me = this, node = r.startContainer, offset = r.startOffset,
finalNode = r.endContainer, finalOffset = r.endOffset;

function visitNode(node, offset) {
var isFinal = (node == finalNode), lastChildIndex, i, c, str = '';

switch (node.nodeType) {
case 3:
case 4:
case 7:
case 8:
str = node.nodeValue;
if (isFinal) {
str = str.substring(0, finalOffset);
}
if (offset) {
str = str.substring(offset);
}
me.onNode(str, node);
break;
default:
me.onNode(str, node);
lastChildIndex = isFinal ? finalOffset : node.childNodes.length;
for (i = offset, c = node.childNodes.item(i); i < lastChildIndex; c =
c.nextSibling, i++) {
if (!visitNode(c, 0)) {
return false;
}
}
}
return !isFinal;
}

while (visitNode(node, offset)) {
if (!node.nextSibling) {
node = node.parentNode;
offset = node.childNodes.length;
} else {
node = node.nextSibling;
offset = 0;
}
}
return true;
};
}
 
D

Doug Gunnoe

I've been working on something that deals with handling a user's
selection within the DOM and I'm tripping up on one last, but crucial,
detail.
Forgive me for the length of the code, but my question is pretty
straightforward and my brain hasn't been working.

Problem: The way I'm iterating through the nodes doesn't allow me to
preserve the node hierarchy.

Instead of:
<div></div><b></b>text

I want:
<div><b>text</b></div>

function RangeIterator() {
this.onNode = function (str, node) {
commonplaceLogMain.cplNodes += '<' + node.nodeName + '>' + str + '</'
+ node.nodeName + '>' ;

};

this.iterate = function (r) {
var me = this, node = r.startContainer, offset = r.startOffset,
finalNode = r.endContainer, finalOffset = r.endOffset;

function visitNode(node, offset) {
var isFinal = (node == finalNode), lastChildIndex, i, c, str = '';

switch (node.nodeType) {
case 3:
case 4:
case 7:
case 8:
str = node.nodeValue;
if (isFinal) {
str = str.substring(0, finalOffset);}

if (offset) {
str = str.substring(offset);}

me.onNode(str, node);
break;
default:
me.onNode(str, node);
lastChildIndex = isFinal ? finalOffset : node.childNodes.length;
for (i = offset, c = node.childNodes.item(i); i < lastChildIndex; c =
c.nextSibling, i++) {
if (!visitNode(c, 0)) {
return false;

}
}
}
return !isFinal;
}

while (visitNode(node, offset)) {
if (!node.nextSibling) {
node = node.parentNode;
offset = node.childNodes.length;} else {

node = node.nextSibling;
offset = 0;



}
}
return true;
};
}- Hide quoted text -

- Show quoted text -

Here is an example I did a couple of months ago. It prints in the
correct order but does not print the closing tag.

http://polisick.com/domTraverseExample.html

Good luck.
 
D

Doug Gunnoe

Bummer, the closing tag is the important part! But thanks though, I'll
take a look, it'll be cool to see how you traverse the DOM anyway. I
might learn a thing or two.- Hide quoted text -

Let's say you keep up with your 'opening tags' in a string called
startTags

You could also create a string to keep up with the closing tags called
endTags

endTags = '';

You could have a little function like:

function makeEndTags(tag){
endTags = tag + endTags;
}

Then when you come to a tag, like "<body>", you could do

makeEndTags('</body>')

Then say the next 'tag' is '<div>' do makeEndTags('</div>')

then if the next is '<p>' do makeEndTags('</p>')

so that you construct a string in endTags that looks like this

</p></div></body>

So at the end you print

startTags + endTags and you get

<body><div><p></p></div></body>

And just insert whatever formatting you like in the two strings as you
go.
 
M

matth

Let's say you keep up with your 'opening tags' in a string called
startTags

You could also create a string to keep up with the closing tags called
endTags

endTags = '';

You could have a little function like:

function makeEndTags(tag){
endTags = tag + endTags;

}

Then when you come to a tag, like "<body>", you could do

makeEndTags('</body>')

Then say the next 'tag' is '<div>' do makeEndTags('</div>')

then if the next is '<p>' do makeEndTags('</p>')

so that you construct a string in endTags that looks like this

</p></div></body>

So at the end you print

startTags + endTags and you get

<body><div><p></p></div></body>

And just insert whatever formatting you like in the two strings as you
go.

I started out with something very similiar to what you pointed out,
but that solution breaks down once you get to something like this:
<div><b></b></div><div><b></b></div>

When is it the right time to add those end tags? Here's a copy of an
approach that I tried but never finished up:
// TESTING

var allowed_html = new Array('b', 'u', 'i', 'h1', 'h2',
'h3', 'h4', 'code', 'pre', 'a', 'li', 'p', 'br');
var parentNode = node.parentNode;
var open_tags = new Array();
while (parentNode) {
if (in_array(allowed_html, parentNode)) {
if (in_array(open_tags, parentNode) == false)
{
//close old tag
commonplaceLogMain.cplNodes += '</' +
parentNode.nodeName.toLowerCase() + '> ';
break;
}
if (in_array(open_tags, parentNode)) {
//open new tag
commonplaceLogMain.cplNodes += '<' +
parentNode.nodeName.toLowerCase() + '> ';
break;
}
}
parentNode = parentNode.parentNode;
}
function in_array (array, value) {
var i;
for (i = 0; i < this.length; i++) {
if (array == value) {
return true;
}
}
return false;
}
 
D

Doug Gunnoe

I started out with something very similiar to what you pointed out,
but that solution breaks down once you get to something like this:
<div><b></b></div><div><b></b></div>

You're right.
When is it the right time to add those end tags? Here's a copy of an
approach that I tried but never finished up:
// TESTING

var allowed_html = new Array('b', 'u', 'i', 'h1', 'h2',
'h3', 'h4', 'code', 'pre', 'a', 'li', 'p', 'br');
var parentNode = node.parentNode;
var open_tags = new Array();
while (parentNode) {
if (in_array(allowed_html, parentNode)) {
if (in_array(open_tags, parentNode) == false)
{
//close old tag
commonplaceLogMain.cplNodes += '</' +
parentNode.nodeName.toLowerCase() + '> ';
break;
}
if (in_array(open_tags, parentNode)) {
//open new tag
commonplaceLogMain.cplNodes += '<' +
parentNode.nodeName.toLowerCase() + '> ';
break;
}
}
parentNode = parentNode.parentNode;
}
function in_array (array, value) {
var i;
for (i = 0; i < this.length; i++) {
if (array == value) {
return true;
}
}
return false;
}


When looking at my example I found a logic error. It does not traverse
back up the tree like it should. Anyway, I thought of the solution but
have not had time try it.

Create an array for the end tags 'stack = []'. When going down the
tree, push the end tags onto the array.

stack.push('</' + node.nodeName + '>');

anytime you move to node.nextSibling or node.parentNode do stack.pop()
and add that to the string.

This way it should only add the end tags at the right time.
 
D

Doug Gunnoe

When looking at my example I found a logic error. It does not traverse
back up the tree like it should. Anyway, I thought of the solution but
have not had time try it.

Create an array for the end tags 'stack = []'. When going down the
tree, push the end tags onto the array.

stack.push('</' + node.nodeName + '>');

anytime you move to node.nextSibling or node.parentNode do stack.pop()
and add that to the string.

This way it should only add the end tags at the right time.

matth,

I don't know if you ever found a solution to this, but it was a pretty
good problem.

No doubt one that was already solved a thousand times, but hey, what
is time for anyway, right? :)

So I finally got a chance to try my use of a stack to track the end
tags and insert them at the right time and I think it works.

http://polisick.com/domTraverseExample.html

Doug
 
D

David Mark

When looking at my example I found a logic error. It does not traverse
back up the tree like it should. Anyway, I thought of the solution but
have not had time try it.
Create an array for the end tags 'stack = []'. When going down the
tree, push the end tags onto the array.
stack.push('</' + node.nodeName + '>');
anytime you move to node.nextSibling or node.parentNode do stack.pop()
and add that to the string.
This way it should only add the end tags at the right time.

matth,

I don't know if you ever found a solution to this, but it was a pretty
good problem.

No doubt one that was already solved a thousand times, but hey, what
is time for anyway, right? :)

So I finally got a chance to try my use of a stack to track the end
tags and insert them at the right time and I think it works.

http://polisick.com/domTraverseExample.html
There is another example at the bottom of this page:

http://cljs.michaux.ca/trac/ticket/13

Basically, there are two (relatively slow) substitutes for (reading)
innerHTML and outerHTML. I used them to test the getAttribute wrapper.
 
R

RobG

I started out with something very similiar to what you pointed out,
but that solution breaks down once you get to something like this:
<div><b></b></div><div><b></b></div>
[...]
When looking at my example I found a logic error. It does not traverse
back up the tree like it should. Anyway, I thought of the solution but
have not had time try it.

Below is a quick 'n dirty I did some time ago, it can be extended if
you want but does the job for me. You just feed it a root element,
the returned string should be set as the value of a text node inside a
pre element:

function spaces(n) {
return (new Array(n + 1)).join(' ');
}

function nodesToText(el, depth) {
depth = (depth === undefined)? 0 : depth + 1;
var s, e0 = '', e1 = '';
var pad = spaces(depth*2);
var nodeName = el.nodeName;

if (el.nodeType == 1) {
s = '\n\r' + pad + '<' + nodeName + '>';
e0 = '\n\r' + pad;
e1 = '</' + nodeName + '>';
} else {
s = '\n\r' + pad + nodeName;
}

var i=0;
var leni = (el.childNodes)? el.childNodes.length : 0;

while (i < leni) {
s = s + nodesToText(el.childNodes, depth);
i++;
}
return s + (i? e0 : '') + e1;
}

Call it with something like:

window.onload = function(){
var el = document.getElementById('foo');

// Wrapped for posting
document.getElementById('dump').
appendChild(document.createTextNode(nodesToText(el)));
}

Where foo is the element to get the content of and dump is a pre
element to write the string to. You can add element attributes like
id, class, etc. if you want, but usually I just want to look at a
small fragment in IE to see what it's doing, given tools like FireBug
and DOM inspectors in other browsers there isn't much call for such a
beast otherwise.
 
D

Doug Gunnoe

Below is a quick 'n dirty I did some time ago, it can be extended if
you want but does the job for me. You just feed it a root element,
the returned string should be set as the value of a text node inside a
pre element:

function spaces(n) {
return (new Array(n + 1)).join(' ');
}

function nodesToText(el, depth) {
depth = (depth === undefined)? 0 : depth + 1;
var s, e0 = '', e1 = '';
var pad = spaces(depth*2);
var nodeName = el.nodeName;

if (el.nodeType == 1) {
s = '\n\r' + pad + '<' + nodeName + '>';
e0 = '\n\r' + pad;
e1 = '</' + nodeName + '>';
} else {
s = '\n\r' + pad + nodeName;
}

var i=0;
var leni = (el.childNodes)? el.childNodes.length : 0;

while (i < leni) {
s = s + nodesToText(el.childNodes, depth);
i++;
}
return s + (i? e0 : '') + e1;
}

Call it with something like:

window.onload = function(){
var el = document.getElementById('foo');

// Wrapped for posting
document.getElementById('dump').
appendChild(document.createTextNode(nodesToText(el)));
}

Where foo is the element to get the content of and dump is a pre
element to write the string to. You can add element attributes like
id, class, etc. if you want, but usually I just want to look at a
small fragment in IE to see what it's doing, given tools like FireBug
and DOM inspectors in other browsers there isn't much call for such a
beast otherwise.


Thanks Rob and David. Informative and interesting.
 
E

Evertjan.

RobG wrote on 13 feb 2008 in comp.lang.javascript:
function spaces(n) {
return (new Array(n + 1)).join(' ');
}

function nodesToText(el, depth) {
depth = (depth === undefined)? 0 : depth + 1;
var s, e0 = '', e1 = '';
var pad = spaces(depth*2);
var nodeName = el.nodeName;

if (el.nodeType == 1) {
s = '\n\r' + pad + '<' + nodeName + '>';
e0 = '\n\r' + pad;
e1 = '</' + nodeName + '>';
} else {
s = '\n\r' + pad + nodeName;
}

var i=0;
var leni = (el.childNodes)? el.childNodes.length : 0;

while (i < leni) {
s = s + nodesToText(el.childNodes, depth);
i++;
}
return s + (i? e0 : '') + e1;
}

Call it with something like:

window.onload = function(){
var el = document.getElementById('foo');

// Wrapped for posting
document.getElementById('dump').
appendChild(document.createTextNode(nodesToText(el)));
}



nice.

Some simplifications:

=====================

function nodesToText(el, depth) {
var s, chld, e = '';
depth = (depth||0) + 1;
var pad = '\n' + new Array(depth).join('--');

var nodeName = el.nodeName;
if (el.nodeType == 1) {
s = pad + '<' + nodeName + '>';
e = '</' + nodeName + '>';
} else
s = pad + nodeName + ' [' + el.nodeValue + ']';

if (chld = el.firstChild)
e = pad + e;

while (chld) {
s += nodesToText(chld, depth);
chld = chld.nextSibling;
};

return s + e;
};

window.onload = function(){
alert(nodesToText(document.body));
};

=====================
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,770
Messages
2,569,584
Members
45,075
Latest member
MakersCBDBloodSupport

Latest Threads

Top