1:<?php 2: 3:// 4:// parseXMLtoArray( ) - 5:// 6:// This function takes a string containing XML and parses it into an array representation. Each array element 7:// (Node) contains the following elements: 8:// 9:// a) One element named "Children" that is an array of the names of all child nodes of this node. 10:// b) One element named "Parameters" that is an array of the parameter names for this node. 11:// c) One element named "Contents" that contains the plain text contents of the node. 12:// d) One element named "NodeName" that contains the name (type) of the current node. 13:// e) The node also contains the parameters, and child nodes as elements, referred to by their names. 14:// the parameters are simple strings, and the nodes are arrays containing all of the above elements itself. 15:// 16:// This makes it very easy to drill down into a simple XML document using just arrays, but also allows 17:// greater flexability for complicated XML documents. 18:// 19:// Limitations: 1) Can not have child node that have the same name (type) as a parameter. 20:// 2) Can not have a parameter or child node with the names (types): "Contents", "Children", "NodeName", or "Parameters". 21:// 22:// To Do: 1) Improperly closed nodes: Backtrack to the last open node that has the same name, and roll-up. If 23:// not found, ignore the close tag. Use the openNodeStack array to check this. 24:// 2) Add the rest of the error checking noted below in the comments. 25:// 3) Add support to XML escape codes. 26:// 4) Verify character set handling. 27:// 28:// 29: 30:function parseXMLtoArray( $inputString ) 31:{ 32: $lastEndPos = 0; // This is where we left off on the last iteration... 33: $outputStack = array(); // This is where we will be assembling out output nodes. 34: 35: // 36: // Start by having a "dummy" parent node... 37: // 38: $newNode = array(); 39: $newNode['Children'] = array(); 40: 41: array_push( $outputStack, $newNode ); 42: 43: // 44: // First, let's trim out any whitespace... 45: // 46: 47: $inputString = trim($inputString); 48: 49: // 50: // We will iterate across the entire input string searching for tags. 51: // 52: 53: while( $lastEndPos < strlen($inputString) ) 54: { 55: // 56: // Start by looking for the next open bracket 57: // 58: 59: $startPos = strpos( $inputString, "<", $lastEndPos ); 60: 61: // 62: // Found a node tag. 63: // 64: 65: if ( FALSE !== $startPos ) 66: { 67: // 68: // Find the end of the element declaration. 69: // 70: 71: $closePos = strpos( $inputString, ">", $startPos); 72: 73: // 74: // Check for an error here... If there are *bad* characters between the start 75: // and close positions, quit, and return FALSE. This would include any open 76: // brackets, an imbalance of quotes, etc. 77: // 78: 79: 80: // 81: // Is this a close tag? 82: // 83: 84: $isCloseTag = ( substr($inputString, $startPos + 1, 1) == "/" )?true:false; 85: 86: // 87: // Is it a self-closing open tag? 88: // 89: $isSelfClosingTag = ( substr($inputString, $closePos - 1, 1) == "/" )?true:false; 90: 91: // 92: // IF this tag begins and ends with a ? treat it as a self-closing tag... 93: // we also want to ignore the opening ? in the tag... 94: // 95: if( substr($inputString, $startPos + 1, 1) == "?" ) 96: { 97: $isSelfClosingTag = ( substr($inputString, $closePos - 1, 1) == "?" )?true:false; 98: $startPos++; 99: } 100: 101: // 102: // Now, get the tag name (from char[0] to the 1st whitespace) 103: // 104: $tagEndPos = 0; 105: for($tagEndPos = $startPos + 1; $tagEndPos < $closePos ; $tagEndPos++ ) 106: { 107: $tmp = substr($inputString,$tagEndPos,1); 108: if( $tmp == " " || $tmp == "\t" || $tmp == "\n" || $tmp == "\x0B" ) 109: break; 110: } 111: 112: $tagName = substr( $inputString, $startPos + ($isCloseTag?2:1), $tagEndPos - $startPos - ($isCloseTag?2:1) ); 113: 114: // 115: // Check to see if we are closing an open element... or opening a new element. 116: // 117: if( $isCloseTag ) 118: { 119: // 120: // A Close tag will not have any parameters, so let's make sure we don't 121: // 122: 123: 124: // 125: // This is a close tag. we should make sure it's the last one we opened. 126: // 127: $CurrNode = array_pop( $outputStack ); 128: 129: if( $CurrNode && $CurrNode['Nodename'] == $tagName ) 130: { 131: // 132: // Else, we take everything from the start of the input string, to the 133: // beginning of this close tag, and append it to the parent node as 134: // "Contents", as long as there is a string value... 135: // 136: 137: // Check to see if there was any text between the $lastEndtPos (the char after 138: // the last close bracket) 139: 140: $checkStr = ""; 141: if( $lastEndPos < $startPos ) 142: $checkStr = trim(substr($inputString, $lastEndPos + 1, $startPos - $lastEndPos - 1)); 143: 144: if( strlen( $checkStr ) > 0 ) 145: { 146: $CurrNode['Contents'] = $checkStr; 147: 148: str_replace("&","&",$CurrNode['Contents']); 149: str_replace("<","<",$CurrNode['Contents']); 150: str_replace(">",">",$CurrNode['Contents']); 151: str_replace("'","'",$CurrNode['Contents']); 152: str_replace(""","\"",$CurrNode['Contents']); 153: } 154: 155: // 156: // Now, add the current node to the parent node as a child... 157: // 158: 159: $parentNode = array_pop( $outputStack ); 160: 161: if( $parentNode ) 162: { 163: if( !isSet( $parentNode[$tagName] ) ) 164: { 165: $parentNode[$tagName] = $CurrNode; 166: 167: array_push( $parentNode['Children'], $tagName ); 168: } 169: else 170: { 171: // 172: // Determine how many children of the current parent share the same type. 173: // 174: 175: $countOfChildren = 1; 176: for( $countOfChildren = 1; 177: isSet( $parentNode[$tagName."_".$countOfChildren] ); 178: $countOfChildren++ ) 179: ; 180: 181: $parentNode[$tagName][$tagName."_".$countOfChildren] = $CurrNode; 182: 183: array_push( $parentNode['Children'], $tagName."_".$countOfChildren ); 184: } 185: 186: array_push( $outputStack, $parentNode ); 187: } 188: else 189: { 190: array_push( $outputStack, $CurrNode ); 191: } 192: 193: } 194: else 195: { 196: 197: // 198: // If we are not closing the last opened element, flag an error. 199: // 200: 201: 202: // 203: // And for safety, let's put the current node back on the stack. 204: // 205: 206: if( $CurrNode ) 207: { 208: array_push( $outputStack, $CurrNode ); 209: } 210: } 211: } 212: else 213: { 214: // 215: // This is an open tag element... so there should be no text between the $lastEndtPos 216: // of the input string, and this start tag (except for whitespace). 217: // If there is, we have an error... 218: // 219: 220: 221: // 222: // If we're OK, lets create the new node. 223: // 224: 225: $newNode = array(); 226: $newNode['Nodename'] = $tagName; 227: $newNode['Children'] = array(); 228: $newNode['Parameters'] = array(); 229: $newNode['Contents'] = ""; 230: 231: // 232: // Now, we can use the $inputText from $tagEndPos + 1 to $closePos to parse the parameters... 233: // 234: 235: $parameters = array(); 236: $indx = $tagEndPos + 1; 237: $lastws = $indx; 238: $lasteq = $indx; 239: $attrName = ""; 240: $arrtf = false; 241: $eqf = false; 242: $escaped = false; 243: $doubleQuote = false; 244: $singleQuote = false; 245: 246: for( ; $indx < $closePos - ( $isSelfClosingTag ? 1 : 0); $indx++ ) 247: { 248: $tmp = substr($inputString,$indx,1); 249: 250: if( $tmp == "\\" && $escaped == false ) 251: { 252: $escaped = true; 253: } 254: elseif( $tmp == '"' && $escaped == false && $singleQuote == false ) 255: { 256: $doubleQuote = $doubleQuote?false:true; 257: 258: // 259: // If $atrf is true, and we get in here (unless we're closing), this should be an error... 260: // 261: 262: if( $attrf && false == $doubleQuote) 263: { 264: // 265: // Ok, this is the closing quote for the attribute, and after 266: // the start of the attribute text... Everything from the equals to here 267: // is the attribute text... 268: // 269: 270: $attrf = false; 271: $eqf = false; 272: 273: $parameters[$attrName] = substr( $inputString, $lasteq + 1, $indx - $lasteq - 1); 274: 275: $attrName = ""; 276: } 277: elseif( $attrf ) 278: { 279: // 280: // ERROR 281: // 282: } 283: elseif( $eqf && true == $doubleQuote) 284: { 285: $attrf = true; 286: $lasteq = $indx; 287: } 288: 289: } 290: elseif( $tmp == "'" && $escaped == false && $doubleQuote == false ) 291: { 292: $singleQuote = $singleQuote?false:true; 293: 294: // 295: // If $atrf is true, and we get in here (unless we're closing), this should be an error... 296: // 297: 298: if( $attrf && false == $singleQuote) 299: { 300: // 301: // Ok, this is the closing quote for the attribute, and after 302: // the start of the attribute text... Everything from the equals to here 303: // is the attribute text... 304: // 305: 306: $attrf = false; 307: $eqf = false; 308: 309: $parameters[$attrName] = substr( $inputString, $lasteq + 1, $indx - $lasteq - 1); 310: 311: $attrName = ""; 312: } 313: elseif( $attrf ) 314: { 315: // 316: // ERROR 317: // 318: } 319: elseif( $eqf && true == $singleQuote) 320: { 321: $attrf = true; 322: $lasteq = $indx; 323: } 324: } 325: elseif( $tmp == "=" && false == $doubleQuote && false == $singleQuote) 326: { 327: // 328: // We have the equals... Everything since the lastws to here should be the attribute name. 329: // 330: 331: $attrName = trim(substr($inputString, $lastws, $indx - $lastws)); 332: $lasteq = $indx; 333: $escaped = false; 334: $eqf = true; 335: $attrf = false; 336: } 337: elseif( ( $tmp == " " || $tmp == "\t" || $tmp == "\n" || $tmp == "\x0B" ) && 338: false == $doubleQuote && false == $singleQuote ) 339: { 340: $lastws = $indx; 341: $escaped = false; 342: 343: if( $attrf ) 344: { 345: // 346: // Ok, this is the 1st un-quoted whitespace after the equals, and after 347: // the start of the attribute text... Everything from the equals to here 348: // is the attribute text... 349: // 350: 351: $attrf = false; 352: $eqf = false; 353: 354: $parameters[$attrName] = substr( $searchText, $lasteq + 1, $indx - $lasteq - 1); 355: 356: $attrName = ""; 357: } 358: } 359: else 360: { 361: if( $eqf ) 362: { 363: $attrf = true; 364: } 365: 366: $escaped = false; 367: } 368: } 369: 370: // 371: // Parameters should be name=value, where value may or may not have single or double quotes. 372: // 373: 374: foreach( $parameters as $parmName => $parmValue ) 375: { 376: $newNode[$parmName] = $parmValue; 377: 378: str_replace("&","&",$newNode[$parmName]); 379: str_replace("<","<",$newNode[$parmName]); 380: str_replace(">",">",$newNode[$parmName]); 381: str_replace("'","'",$newNode[$parmName]); 382: str_replace(""","\"",$newNode[$parmName]); 383: 384: array_push( $newNode['Parameters'], $parmName ); 385: } 386: 387: // 388: // If the close bracket for the node was a /> then we won't have a close tag. 389: // 390: 391: if( $isSelfClosingTag ) 392: { 393: // 394: // If we have a self-closing tag, just add the node to the parent, and move on. 395: // 396: 397: $parentNode = array_pop( $outputStack ); 398: 399: if( $parentNode ) 400: { 401: if( !isSet( $parentNode[$tagName] ) ) 402: { 403: $parentNode[$tagName] = $newNode; 404: 405: array_push( $parentNode['Children'], $tagName ); 406: } 407: else 408: { 409: // 410: // Determine how many children of the current parent share the same type. 411: // 412: 413: $countOfChildren = 1; 414: for( $countOfChildren = 1; 415: isSet( $parentNode[$tagName."_".$countOfChildren] ); 416: $countOfChildren++ ) 417: ; 418: 419: $parentNode[$tagName][$tagName."_".$countOfChildren] = $newNode; 420: 421: array_push( $parentNode['Children'], $tagName."_".$countOfChildren ); 422: } 423: 424: array_push( $outputStack, $parentNode ); 425: } 426: else 427: { 428: array_push( $outputStack, $newNode ); 429: } 430: } 431: else 432: { 433: // 434: // Else, if there is more following (with a separate close tag) 435: // we take the current newNode and push it onto the outputStack as the new end. 436: // 437: 438: array_push( $outputStack, $newNode ); 439: } 440: } 441: } 442: else 443: { 444: // 445: // If no open bracket is found (i.e. there is just text left in the string and no additional 446: // nodes), we should set $closePos to the end of the input string, and flag an error. 447: // 448: 449: $closePos = strlen($inputString); 450: } 451: 452: // 453: // Prepare for the next iteration... 454: // 455: 456: $lastEndPos = $closePos; 457: } 458: 459: // 460: // Now, we should double-check and return the outputStack array. 461: // 462: 463: return $outputStack; 464:} 465: 466:?>