package Pitonyak::XMLUtil; #************************************************************ =head1 NAME Pitonyak::XMLUtil - Convert Objects to and from XML. =head1 DESCRIPTION A few simple XML utilities that will convert arbitrary objects to XML and back again. These routines have not been extensively tested. =cut require Exporter; $VERSION = '1.00'; @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw( convert_entity_references_to_characters convert_xml_characters_to_entity_references object_to_xml xml_to_object ); use Carp; use IO::File; use File::Basename; use strict; use XML::Parser; #************************************************************ =pod =head2 convert_entity_references_to_characters =over 4 =item convert_entity_references_to_characters(@strings_with_entity_refs) =back Change '&' to '&', '<' to '<', '>' to '>', '"' to '"', and ''' to "'". The calling parameters are modfied =cut #************************************************************ my %s2x = ( '&' => '&', '<' => '<', '>' => '>', '"' => '"', ''' => "'", ); sub convert_entity_references_to_characters { return undef if $#_ < 0; for (@_) { s/('|"|>|<|&)/"$s2x{$1}"/geos; } return wantarray ? @_ : $_[0]; } #************************************************************ =pod =head2 convert_xml_characters_to_entity_references =over 4 =item convert_xml_characters_to_entity_references(@strings_needing_entity_refs) =back Change '&' to '&', '<' to '<', '>' to '>', '"' to '"', and "'" to ''', The calling parameters are modfied. This is used to render a string safe to send as XML. Existing entity referenes will have their leading ampersand transformed. =cut #************************************************************ my %x2s = ( '&' => '&', '<' => '<', '>' => '>', '"' => '"', "'" => ''', ); sub convert_xml_characters_to_entity_references { # If called from a logger object, then simply discard if ( $#_ >= 0 && UNIVERSAL::isa( $_[0], 'XMLUtil' ) ) { shift; } return undef if $#_ < 0; for (@_) { s/([&<>"'])/$x2s{$1}/geo; } return wantarray ? @_ : $_[0]; } #********************************************************************* #** ** #** Input : left indentation ** #** One object to convert ** #** ** #** Output: Objects converted to XML ** #** ** #** Notes : Each parameter is a single XML string ** #** The element name identifies the type ** #** Objects are usually converted to a HASH ** #** ** #********************************************************************* sub internal_object_to_xml { return undef if $#_ < 0; my $left = shift; my $thing_to_print = shift; my $txt; if ( !defined($thing_to_print) ) { $txt = ''; } else { my $ref_type = ref $thing_to_print; if ( !$ref_type ) { $txt = convert_xml_characters_to_entity_references($thing_to_print); } elsif ( $ref_type eq 'SCALAR' ) { # If the referenced item can not be converted # then it will not be my $internal = internal_object_to_xml( "$left ", $$thing_to_print ); if ( defined($internal) && length($internal) > 0 ) { $txt = "\n$internal\n$left"; } else { $txt = ""; } } elsif ( $ref_type eq 'ARRAY' ) { my $internal_txt = ''; foreach my $temp_thing (@$thing_to_print) { my $internal = internal_object_to_xml( "$left ", $temp_thing ); if ( length($internal) == 0 ) { $internal_txt .= "$left \n"; } elsif ( index( $internal, '<' ) >= $[ ) { $internal_txt .= "$left \n$left $internal\n$left \n"; } else { $internal_txt .= "$left $internal\n"; } } if ( length($internal_txt) > 0 ) { $txt = "\n$internal_txt$left"; } else { $txt = ''; } } elsif ( UNIVERSAL::isa( $thing_to_print, 'HASH' ) ) { # # Remember that each hash has one universal iterator # recursive nesting will therefore cause stranger # results than a simple infinite loop. # my $hash_txt = "\n"; my ( $key, $value ); while ( ( $key, $value ) = each %$thing_to_print ) { my $value_xml; my $key_xml; if ( defined($value) ) { $value_xml = internal_object_to_xml( "$left ", $value ); } if ( defined($key) ) { $key_xml = internal_object_to_xml( "$left ", $key ); } if ( defined($key) ) { if ( index( $key_xml, '<' ) >= $[ || ( defined($value_xml) && index( $value_xml, '<' ) >= $[ ) ) { $hash_txt .= "$left \n$left "; if ( index( $key_xml, '<' ) >= $[ ) { $hash_txt .= "\n$left $key_xml\n$left \n"; } elsif ( length($key_xml) > 0 ) { $hash_txt .= "$key_xml\n"; } else { $hash_txt .= "\n"; } if ( defined($value_xml) ) { if ( index( $value_xml, '<' ) >= $[ ) { $hash_txt .= "$left \n$left $value_xml\n$left \n"; } elsif ( length($value_xml) > 0 ) { $hash_txt .= "$left $value_xml\n"; } else { $hash_txt .= "$left \n"; } } $hash_txt .= "$left \n"; } elsif ( defined($key_xml) ) { $hash_txt .= "$left "; if ( length($key_xml) > 0 ) { $hash_txt .= "$key_xml"; } else { $hash_txt .= ""; } if ( defined($value_xml) ) { if ( length($value_xml) > 0 ) { $hash_txt .= "$value_xml"; } else { $hash_txt .= ""; } } $hash_txt .= "\n"; } } } if ( defined($hash_txt) && index( $hash_txt, '<' ) >= $[ ) { $txt = "$hash_txt$left"; } else { $txt = ''; } } } return $txt; } #************************************************************ =pod =head2 object_to_xml =over 4 =item object_to_xml(@objects_to_transform) =back Transform an object into XML. An attempt is made to make this object human readable. Note that if the object is a package object that is referenced as a HASH it is still embedded as a HASH. Each object in the array is returned as a separate XML string. An object that is not defined is returned as CNULLEE>> A SCALAR is rendered XML safe by converting special characters to entity references. It is otherwise left unchanged. A Reference to a SCALAR is encoded as CREFEE> for a zero length SCALAR and as CREFEvalueEEREFE> An ARRAY reference is encoded as either CARRAYEE> or something similar to CARRAYEEVALUEEvalueEEVALUEEEEARRAYE>. A HASH reference is encoded as CHASHEEPAIREEKEYEvalueEEKEYEEVALUEEvalueEVALUEEEEPAIREEEHASHE> A PAIR may be missing a C which means that it is undefined. If a value is really the intended value, then it is rendered XML safe by using entity references and no extra space is used. If the value is a reference to something else then the object is converted to XML using extra white space and indentation for easier reading. =cut #************************************************************ sub object_to_xml { return undef if $#_ < 0; my @object_xmls = (); foreach my $thing_to_print (@_) { my $txt = internal_object_to_xml( '', $thing_to_print ); push @object_xmls, $txt if defined($txt) && length($txt) > 0; } return wantarray ? @object_xmls : $object_xmls[0]; } #********************************************************************* #** ** #** Input : Array reference of an XML object ** #** Index into the array from which to start ** #** ** #** Output: XML converted back to objects ** #** ** #** Notes : This takes the string from object_to_xml ** #** ** #********************************************************************* sub internal_xml_to_object { return undef if $#_ < 0; if ( ref( $_[0] ) ne 'ARRAY' ) { carp( "Array reference expected as the first parameter, not" . ref( $_[0] ) ); return undef; } my $array_ref = shift; my $tag_start = 0; $tag_start = shift unless $#_ < 0; if ( $tag_start > $#$array_ref ) { confess( "Requested an index of $tag_start when the array is not large enough" ); return undef; } my $obj; my $element_name = $array_ref->[$tag_start]; my $element; $element = $array_ref->[ $tag_start + 1 ] if $tag_start < $#$array_ref; if ( $element_name eq '0' ) { if ( defined($element) ) { $obj = convert_entity_references_to_characters($element); } else { $obj = ''; } } elsif ( $element_name eq 'NULL' ) { # $obj is already undefined } elsif ( defined($element) && ref($element) ne 'ARRAY' ) { carp( "The element $element_name is not followed by an array, it is an " . ref($element) ); } elsif ( $element_name eq 'REF' ) { my $temp = internal_xml_to_object( $element, 1 ); $obj = \$temp; } elsif ( $element_name eq 'ARRAY' ) { #?? I think that this is wrong! my @my_array = (); my $array_len = 0; if ( defined($element) ) { $array_len = $#$element; } for ( my $i = 1 ; $i < $array_len ; $i += 2 ) { # skip white space if ( $element->[$i] ne '0' ) { push @my_array, internal_xml_to_object( $element, $i ); } } $obj = \@my_array; } elsif ( $element_name eq 'HASH' ) { my %my_hash = (); my $array_len = 0; if ( defined($element) ) { $array_len = $#$element; } for ( my $i = 1 ; $i < $array_len ; $i += 2 ) { # skip white space if ( $element->[$i] eq 'Pair' ) { my ( $key, $val ); my $internal_array = $element->[ $i + 1 ]; my $internal_array_len = $#$internal_array; for ( my $j = 1 ; $j < $internal_array_len ; $j += 2 ) { if ( $internal_array->[$j] eq 'Key' ) { my $key_array = $internal_array->[ $j + 1 ]; my $length_of_key_array = $#$key_array; if ( $length_of_key_array == 0 ) { $key = ''; } else { my $idx_to_use = 1; for ( my $k = 1 ; $k < $length_of_key_array ; $k += 2 ) { $idx_to_use = $k if $key_array->[$k] ne '0'; } $key = internal_xml_to_object( $key_array, $idx_to_use ); } } elsif ( $internal_array->[$j] eq 'Value' ) { my $val_array = $internal_array->[ $j + 1 ]; my $length_of_val_array = $#$val_array; if ( $length_of_val_array == 0 ) { $val = ''; } else { my $idx_to_use = 1; for ( my $k = 1 ; $k < $length_of_val_array ; $k += 2 ) { $idx_to_use = $k if $val_array->[$k] ne '0'; } $val = internal_xml_to_object( $val_array, $idx_to_use ); } } } $my_hash{$key} = $val; } } $obj = \%my_hash; } return $obj; } #************************************************************ =pod =head2 xml_to_object =over 4 =item xml_to_object(@xml_strings_to_convert_to_objects) =back Convert XML strings back into objects. =cut #************************************************************ sub xml_to_object { # If called from a logger object, then simply discard if ( $#_ >= 0 && UNIVERSAL::isa( $_[0], 'XMLUtil' ) ) { shift; } return undef if $#_ < 0; my @objects = (); my $xml_parser = new XML::Parser( Style => 'Tree' ); NEXT_XML_STRING: foreach my $xml_string (@_) { my $obj; my $tree = $xml_parser->parsestring($xml_string); $obj = internal_xml_to_object( $tree, 0 ); push @objects, $obj; } return wantarray ? @objects : $objects[0]; } #************************************************************ =pod =head1 COPYRIGHT Copyright 2002, Andrew Pitonyak (perlboy@pitonyak.org) This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 Modification History =head2 September 10, 2002 Version 1.00 Initial release =cut #************************************************************ 1;