XMLUtil.pm
package Pitonyak::XMLUtil;
#************************************************************
=head1 NAME
Pitonyak::XMLUtil - Convert Objects to and from XML.
=head1 DESCRIPTION
A few simple XML utilities that will convert arbitrary objects to XML and back again.
These routines have not been extensively tested.
=cut
require Exporter;
$VERSION   = '1.00';
@ISA       = qw(Exporter);
@EXPORT    = qw();
@EXPORT_OK = qw(
  convert_entity_references_to_characters
  convert_xml_characters_to_entity_references
  object_to_xml
  xml_to_object
);
use Carp;
use IO::File;
use File::Basename;
use strict;
use XML::Parser;
#************************************************************
=pod
=head2 convert_entity_references_to_characters
=over 4
=item convert_entity_references_to_characters(@strings_with_entity_refs)
=back
Change '&' to '&', '<' to '<', '>' to '>', '"' to '"', and ''' to "'".
The calling parameters are modfied
=cut
#************************************************************
my %s2x = (
    '&'  => '&',
    '<'   => '<',
    '>'   => '>',
    '"' => '"',
    ''' => "'",
);
sub convert_entity_references_to_characters {
    return undef if $#_ < 0;
    for (@_) {
        s/('|"|>|<|&)/"$s2x{$1}"/geos;
    }
    return wantarray ? @_ : $_[0];
}
#************************************************************
=pod
=head2 convert_xml_characters_to_entity_references
=over 4
=item convert_xml_characters_to_entity_references(@strings_needing_entity_refs)
=back
Change '&' to '&', '<' to '<', '>' to '>', '"' to '"', and "'" to ''',
The calling parameters are modfied.
This is used to render a string safe to send as XML.
Existing entity referenes will have their leading ampersand transformed.
=cut
#************************************************************
my %x2s = (
    '&' => '&',
    '<' => '<',
    '>' => '>',
    '"' => '"',
    "'" => ''',
);
sub convert_xml_characters_to_entity_references {
    # If called from a logger object, then simply discard
    if ( $#_ >= 0 && UNIVERSAL::isa( $_[0], 'XMLUtil' ) ) {
        shift;
    }
    return undef if $#_ < 0;
    for (@_) {
        s/([&<>"'])/$x2s{$1}/geo;
    }
    return wantarray ? @_ : $_[0];
}
#*********************************************************************
#**                                                                 **
#**  Input : left indentation                                       **
#**          One object to convert                                  **
#**                                                                 **
#**  Output: Objects converted to XML                               **
#**                                                                 **
#**  Notes : Each parameter is a single XML string                  **
#**          The element name identifies the type                   **
#**          Objects are usually converted to a HASH                **
#**                                                                 **
#*********************************************************************
sub internal_object_to_xml {
    return undef if $#_ < 0;
    my $left           = shift;
    my $thing_to_print = shift;
    my $txt;
    if ( !defined($thing_to_print) ) {
        $txt = '<NULL/>';
    }
    else {
        my $ref_type = ref $thing_to_print;
        if ( !$ref_type ) {
            $txt = convert_xml_characters_to_entity_references($thing_to_print);
        }
        elsif ( $ref_type eq 'SCALAR' ) {
            # If the referenced item can not be converted
            # then it will not be
            my $internal =
              internal_object_to_xml( "$left  ", $$thing_to_print );
            if ( defined($internal) && length($internal) > 0 ) {
                $txt = "<REF>\n$internal\n$left</REF>";
            }
            else {
                $txt = "<REF/>";
            }
        }
        elsif ( $ref_type eq 'ARRAY' ) {
            my $internal_txt = '';
            foreach my $temp_thing (@$thing_to_print) {
                my $internal = internal_object_to_xml( "$left  ", $temp_thing );
                if ( length($internal) == 0 ) {
                    $internal_txt .= "$left  <Value/>\n";
                }
                elsif ( index( $internal, '<' ) >= $[ ) {
                    $internal_txt .=
                      "$left  <Value>\n$left    $internal\n$left  </Value>\n";
                }
                else {
                    $internal_txt .= "$left  <Value>$internal</Value>\n";
                }
            }
            if ( length($internal_txt) > 0 ) {
                $txt = "<ARRAY>\n$internal_txt$left</ARRAY>";
            }
            else {
                $txt = '<ARRAY/>';
            }
        }
        elsif ( UNIVERSAL::isa( $thing_to_print, 'HASH' ) ) {
            #
            # Remember that each hash has one universal iterator
            # recursive nesting will therefore cause stranger
            # results than a simple infinite loop.
            #
            my $hash_txt = "\n";
            my ( $key, $value );
            while ( ( $key, $value ) = each %$thing_to_print ) {
                my $value_xml;
                my $key_xml;
                if ( defined($value) ) {
                    $value_xml =
                      internal_object_to_xml( "$left      ", $value );
                }
                if ( defined($key) ) {
                    $key_xml = internal_object_to_xml( "$left      ", $key );
                }
                if ( defined($key) ) {
                    if (
                        index( $key_xml, '<' ) >= $[
                        || ( defined($value_xml)
                            && index( $value_xml, '<' ) >= $[ )
                      )
                    {
                        $hash_txt .= "$left  <Pair>\n$left    ";
                        if ( index( $key_xml, '<' ) >= $[ ) {
                            $hash_txt .=
                              "<Key>\n$left      $key_xml\n$left    </Key>\n";
                        }
                        elsif ( length($key_xml) > 0 ) {
                            $hash_txt .= "<Key>$key_xml</Key>\n";
                        }
                        else {
                            $hash_txt .= "<Key/>\n";
                        }
                        if ( defined($value_xml) ) {
                            if ( index( $value_xml, '<' ) >= $[ ) {
                                $hash_txt .=
"$left    <Value>\n$left      $value_xml\n$left    </Value>\n";
                            }
                            elsif ( length($value_xml) > 0 ) {
                                $hash_txt .=
                                  "$left    <Value>$value_xml</Value>\n";
                            }
                            else {
                                $hash_txt .= "$left    <Value/>\n";
                            }
                        }
                        $hash_txt .= "$left  </Pair>\n";
                    }
                    elsif ( defined($key_xml) ) {
                        $hash_txt .= "$left  <Pair>";
                        if ( length($key_xml) > 0 ) {
                            $hash_txt .= "<Key>$key_xml</Key>";
                        }
                        else {
                            $hash_txt .= "<Key/>";
                        }
                        if ( defined($value_xml) ) {
                            if ( length($value_xml) > 0 ) {
                                $hash_txt .= "<Value>$value_xml</Value>";
                            }
                            else {
                                $hash_txt .= "<Value/>";
                            }
                        }
                        $hash_txt .= "</Pair>\n";
                    }
                }
            }
            if ( defined($hash_txt) && index( $hash_txt, '<' ) >= $[ ) {
                $txt = "<HASH>$hash_txt$left</HASH>";
            }
            else {
                $txt = '<HASH/>';
            }
        }
    }
    return $txt;
}
#************************************************************
=pod
=head2 object_to_xml
=over 4
=item object_to_xml(@objects_to_transform)
=back
Transform an object into XML.
An attempt is made to make this object human readable.
Note that if the object is a package object that is referenced as a HASH
it is still embedded as a HASH.
Each object in the array is returned as a separate XML string.
An object that is not defined is returned as C<E<lt>NULLE<sol>E<gt>>>
A SCALAR is rendered XML safe by converting special characters to entity references.
It is otherwise left unchanged.
A Reference to a SCALAR is encoded as C<E<lt>REFE<sol>E<gt>> for a zero length SCALAR and as
C<E<lt>REFE<gt>valueE<lt>E<sol>REFE<gt>>
An ARRAY reference is encoded as either C<E<lt>ARRAYE<sol>E<gt>> or something similar to
C<E<lt>ARRAYE<gt>E<lt>VALUEE<gt>valueE<lt>E<sol>VALUEE<gt>E<lt>E<sol>ARRAYE<gt>>.
A HASH reference is encoded as
C<E<lt>HASHE<gt>E<lt>PAIRE<gt>E<lt>KEYE<gt>valueE<lt>E<sol>KEYE<gt>E<lt>VALUEE<gt>valueE<lt>VALUEE<gt>E<lt>E<sol>PAIRE<gt>E<lt>E<sol>HASHE<gt>>
A PAIR may be missing a C<VALUE> which means that it is undefined.
If a value is really the intended value, then it is rendered XML safe by using entity references
and no extra space is used. If the value is a reference to something else then the object is converted
to XML using extra white space and indentation for easier reading.
=cut
#************************************************************
sub object_to_xml {
    return undef if $#_ < 0;
    my @object_xmls = ();
    foreach my $thing_to_print (@_) {
        my $txt = internal_object_to_xml( '', $thing_to_print );
        push @object_xmls, $txt if defined($txt) && length($txt) > 0;
    }
    return wantarray ? @object_xmls : $object_xmls[0];
}
#*********************************************************************
#**                                                                 **
#**  Input : Array reference of an XML object                       **
#**          Index into the array from which to start               **
#**                                                                 **
#**  Output: XML converted back to objects                          **
#**                                                                 **
#**  Notes : This takes the string from object_to_xml               **
#**                                                                 **
#*********************************************************************
sub internal_xml_to_object {
    return undef if $#_ < 0;
    if ( ref( $_[0] ) ne 'ARRAY' ) {
        carp( "Array reference expected as the first parameter, not"
              . ref( $_[0] ) );
        return undef;
    }
    my $array_ref = shift;
    my $tag_start = 0;
    $tag_start = shift unless $#_ < 0;
    if ( $tag_start > $#$array_ref ) {
        confess(
"Requested an index of $tag_start when the array is not large enough"
        );
        return undef;
    }
    my $obj;
    my $element_name = $array_ref->[$tag_start];
    my $element;
    $element = $array_ref->[ $tag_start + 1 ] if $tag_start < $#$array_ref;
    if ( $element_name eq '0' ) {
        if ( defined($element) ) {
            $obj = convert_entity_references_to_characters($element);
        }
        else {
            $obj = '';
        }
    }
    elsif ( $element_name eq 'NULL' ) {
        # $obj is already undefined
    }
    elsif ( defined($element) && ref($element) ne 'ARRAY' ) {
        carp( "The element $element_name is not followed by an array, it is an "
              . ref($element) );
    }
    elsif ( $element_name eq 'REF' ) {
        my $temp = internal_xml_to_object( $element, 1 );
        $obj = \$temp;
    }
    elsif ( $element_name eq 'ARRAY' ) {
        #?? I think that this is wrong!
        my @my_array  = ();
        my $array_len = 0;
        if ( defined($element) ) {
            $array_len = $#$element;
        }
        for ( my $i = 1 ; $i < $array_len ; $i += 2 ) {
            # skip white space
            if ( $element->[$i] ne '0' ) {
                push @my_array, internal_xml_to_object( $element, $i );
            }
        }
        $obj = \@my_array;
    }
    elsif ( $element_name eq 'HASH' ) {
        my %my_hash   = ();
        my $array_len = 0;
        if ( defined($element) ) {
            $array_len = $#$element;
        }
        for ( my $i = 1 ; $i < $array_len ; $i += 2 ) {
            # skip white space
            if ( $element->[$i] eq 'Pair' ) {
                my ( $key, $val );
                my $internal_array     = $element->[ $i + 1 ];
                my $internal_array_len = $#$internal_array;
                for ( my $j = 1 ; $j < $internal_array_len ; $j += 2 ) {
                    if ( $internal_array->[$j] eq 'Key' ) {
                        my $key_array           = $internal_array->[ $j + 1 ];
                        my $length_of_key_array = $#$key_array;
                        if ( $length_of_key_array == 0 ) {
                            $key = '';
                        }
                        else {
                            my $idx_to_use = 1;
                            for (
                                my $k = 1 ;
                                $k < $length_of_key_array ;
                                $k += 2
                              )
                            {
                                $idx_to_use = $k if $key_array->[$k] ne '0';
                            }
                            $key =
                              internal_xml_to_object( $key_array, $idx_to_use );
                        }
                    }
                    elsif ( $internal_array->[$j] eq 'Value' ) {
                        my $val_array           = $internal_array->[ $j + 1 ];
                        my $length_of_val_array = $#$val_array;
                        if ( $length_of_val_array == 0 ) {
                            $val = '';
                        }
                        else {
                            my $idx_to_use = 1;
                            for (
                                my $k = 1 ;
                                $k < $length_of_val_array ;
                                $k += 2
                              )
                            {
                                $idx_to_use = $k if $val_array->[$k] ne '0';
                            }
                            $val =
                              internal_xml_to_object( $val_array, $idx_to_use );
                        }
                    }
                }
                $my_hash{$key} = $val;
            }
        }
        $obj = \%my_hash;
    }
    return $obj;
}
#************************************************************
=pod
=head2 xml_to_object
=over 4
=item xml_to_object(@xml_strings_to_convert_to_objects)
=back
Convert XML strings back into objects.
=cut
#************************************************************
sub xml_to_object {
    # If called from a logger object, then simply discard
    if ( $#_ >= 0 && UNIVERSAL::isa( $_[0], 'XMLUtil' ) ) {
        shift;
    }
    return undef if $#_ < 0;
    my @objects    = ();
    my $xml_parser = new XML::Parser( Style => 'Tree' );
  NEXT_XML_STRING: foreach my $xml_string (@_) {
        my $obj;
        my $tree = $xml_parser->parsestring($xml_string);
        $obj = internal_xml_to_object( $tree, 0 );
        push @objects, $obj;
    }
    return wantarray ? @objects : $objects[0];
}
#************************************************************
=pod
=head1 COPYRIGHT
Copyright 2002, Andrew Pitonyak (perlboy@pitonyak.org)
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=head1 Modification History
=head2 September 10, 2002
Version 1.00 Initial release
=cut
#************************************************************
1;