package ACIS::Data::DumpXML;

use strict;
use vars qw(@EXPORT_OK $VERSION $LEVEL);

require Exporter;
*import = \&Exporter::import;
@EXPORT_OK=qw(dump_xml dump_xml2 dump);

$VERSION = "1.05";  # $Date: 2003/07/19 10:39:48 $

# configuration
use vars qw($INDENT $INDENT_STYLE $XML_DECL $NAMESPACE $NS_PREFIX
            $SCHEMA_LOCATION $DTD_LOCATION $ROOT_NAME $SIMPLE_VIEW
            $HASH_ELEMENT $ARRAY_ELEMENT $REF_ELEMENT $ENCODING
            $KEY_AS_HASH_ELEMENT);

$KEY_AS_HASH_ELEMENT = 1
 unless defined $KEY_AS_HASH_ELEMENT;

$ENCODING = 'UTF-8'
 unless defined $ENCODING;

$ROOT_NAME = 'data'
 unless defined $ROOT_NAME;

$SIMPLE_VIEW = 0
 unless defined $SIMPLE_VIEW;

$HASH_ELEMENT = 'hash-item'
 unless defined $HASH_ELEMENT;

$ARRAY_ELEMENT = 'list-item'
 unless defined $ARRAY_ELEMENT;

$REF_ELEMENT = 'reference'
 unless defined $REF_ELEMENT;

$INDENT_STYLE = "XML"
 unless defined $INDENT_STYLE;

$XML_DECL = 1
 unless defined $XML_DECL;

$INDENT = " "
 unless defined $INDENT;

$NAMESPACE = ''
 unless defined $NAMESPACE;

$NS_PREFIX = ''
 unless defined $NS_PREFIX;

$SCHEMA_LOCATION = ''
 unless defined $SCHEMA_LOCATION;

$DTD_LOCATION = ''
 unless defined $DTD_LOCATION;

$LEVEL = 0;

# other globals
use vars qw($NL);

use utf8;


use overload ();
use vars qw(%seen %ref $count $prefix %ref2 %references %used $depth );

sub dump_xml2 {
    local $DTD_LOCATION = "";
    local $XML_DECL = "";
    dump_xml(@_);
}

sub analyze;

sub dump_xml {
    local %seen;
    local %ref;
    local %references;
    local %used;
    local $count = 0;
    local $depth = 0;
    local $prefix = ($NAMESPACE && $NS_PREFIX) ? "$NS_PREFIX:" : "";

    local $NL = ($INDENT) ? "\n" : "";

    my $out = "";
    $out .= qq(<?xml version="1.0" encoding="$ENCODING"?>\n)
     if $XML_DECL;
    
    $out .= qq(<!DOCTYPE data SYSTEM "$DTD_LOCATION">\n)
     if $DTD_LOCATION;

    my $namespace = '';

    $namespace = ($NS_PREFIX ? "xmlns:$NS_PREFIX" : "xmlns") . qq(="$NAMESPACE")
     if $NAMESPACE;
    $namespace .= qq( xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="$SCHEMA_LOCATION")
     if $SCHEMA_LOCATION;

    my $structure;
    
    if ( (scalar @_) == 1)
     { $structure = shift; }
    else
     { $structure = \@_; }
     
    analyze ($structure);
    
    foreach (keys %references)
     {
      delete $references{$_}
       unless ($references{$_});
     }
    
    $out .= simple_dump ($structure, $ROOT_NAME, [$namespace]);

    $count = 0;
    $out =~ s/\01/$ref{++$count} ? qq( id="r$ref{$count}") : ""/ge;

    print STDERR $out unless defined wantarray;
    $out;
}

*dump = \&dump_xml;

sub analyze
 {
  my $structure = shift;
  
  return unless defined $structure;
  
  if (overload::StrVal($structure) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/)
   {
    if (defined $references{$3})
     { 
      return
       if ($references{$3});
      
      $references{$3} = ++$count;
      return;
     }
    else
     {
      $references{$3} = 0;
      if ($2 eq 'HASH')
       {
        foreach (values %$structure)
         {
          analyze ($_);
         }
       }
      elsif ($2 eq 'ARRAY')
       {
        foreach (@$structure)
         {
          analyze ($_);
         }
       }
      elsif ($2 eq 'REF') 
       {
        analyse ($$structure);
       }
      return; 
     }
   }
 }

sub simple_dump
 {
  my $rval  = \$_[0]; shift;
  my $tag   = shift;
  my $attributes = shift || [];
  
  local $LEVEL = $LEVEL + 1;
  
  my $indent = $INDENT x $LEVEL;
  
  my $deref = shift;
  $rval = $$rval if $deref;

  my $attr_str = '';

  $attr_str = ' ' . join (' ', @$attributes)
   if (defined $attributes and scalar @$attributes);

  my($class, $type, $id);
  if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/)
   {
    $class = $1 ? " class=" . quote($1) : "";
    $type  = $2;
    $id    = $3;
   }
  else
   { return qq($indent<!-- Can\'t parse \") . overload::StrVal($rval) . qq(\" -->); }
  
  if (my $ref_no = $references{$id})
   {
    return qq($indent<${prefix}$tag$attr_str><$REF_ELEMENT to="$ref_no"/></${prefix}$tag>)
     if ($used{$id});
    $used{$id} = 'yes';
   }

  $id = $references{$id} ? " id=" . quote ($references{$id}) : '';

  if ($type eq "SCALAR" || $type eq "REF")
   {
    return "$indent<${prefix}$tag$attr_str><undef /></${prefix}$tag>"
     unless defined $$rval;
    
    if (ref $$rval)
     {
      $depth++;
      return
       "$indent<${prefix}$tag$class$id$attr_str>".
       simple_dump($$rval, $REF_ELEMENT, undef, 1).
       "</${prefix}$tag>"
        if (((ref $$rval) eq 'SCALAR') or ((ref $$rval) eq 'REF'));
      
      #return format_list (simple_dump($$rval, $tag, $attributes, 1));
      return simple_dump($$rval, $tag, $attributes, 1);
     }

    my($str, $enc) = esc($$rval);
    #my $enc = '';
    #my $str = $$rval;
    return "$indent<${prefix}$tag$class$id$attr_str$enc>$str</${prefix}$tag>";
   }
  elsif ($type eq "ARRAY")
   {
    my @array;
    return "$indent<${prefix}$tag$class$id$attr_str><empty-array /></${prefix}$tag>"
     unless scalar @$rval;
    
    my $counter = 0;
    foreach (@$rval)
     {
      push @array, simple_dump($_, $ARRAY_ELEMENT, ["pos=\"$counter\""]);
      $counter++;
     }
    return "$indent<${prefix}$tag$class$id$attr_str>$NL".
           join ("$NL", @array).
           "$NL$indent</${prefix}$tag>";
   }
  elsif ($type eq "HASH")
   {
    
    my $out = "$indent<${prefix}$tag$class$id$attr_str>";
    
    return "$out<empty-hash /></${prefix}$tag>"
     unless scalar keys %$rval;
    
    $out .= $NL;
    
    foreach my $key (sort keys %$rval)
     {
      my $val = \$rval->{$key};
      #die $key;
      if ($key =~ /^(?=[^\d\W]|:)[\w:-]*$/ and $KEY_AS_HASH_ELEMENT)
       {
        my $quotedkey = quote ($key);
        $val = simple_dump($$val, $key, ["key=$quotedkey"]);
       }
      else
       {
        my $quotedkey = quote ($key);
        $val = simple_dump($$val, $HASH_ELEMENT, ["key=$quotedkey"]);
       }  
#      if ($indent)
#       {
#        $val =~ s/^/$indent/gm;
#        #$out .= $indent;
#       }
      $out .= $val . $NL;
     }
    if ($INDENT_STYLE eq "Lisp")
     {
      # kill final NL
      substr($out, -length($NL)) = "";
     }
    $out .= "$indent</${prefix}$tag>";
    return $out;
   }
  elsif ($type eq "GLOB")
   {
    return "$indent<${prefix}glob$class$id/>";
   }
  elsif ($type eq "CODE")
   {
    return "$indent<${prefix}code$class$id/>";
   }
  else
   {
    #warn "Can't handle $type data";
    return "<!-- Unknown type $type -->";
   }
  die "Assert";
 }


#sub format_list {
#    my @elem = @_;
#    if ($INDENT) {
#	for (@elem) { s/^/$INDENT/gm; }
#    }
#    return join($NL, @elem );
#}



# put a string value in double quotes
sub quote {
    local($_) = shift;

#    $_ = pack('U*', unpack('U*', $_ ));
    
    s/&/&amp;/g;
#    s/\"/&quot;/g;
    s/]]>/]]&gt;/g;
    s/</&lt;/g;
    s/([^\040-\176])/sprintf("&#x%x;", ord($1))/ge
     unless ($ENCODING eq 'UTF-8')
       ;
    return qq("$_");
}

sub esc
 {
  local($_) = shift;
    
#  $_ = pack('U*', unpack('U*', $_ ));

  if ($ENCODING eq 'UTF-8')
   {
    s/&/&amp;/g;
    s/</&lt;/g;
    s/]]>/]]&gt;/g;
    #s/([^\x0a\x0d\040-\176])/sprintf("&#x%x;", ord($1))/ge;

    return $_, "";
   }
  elsif (/[\x00-\x08\x0B\x0C\x0E-\x1F\x7f-\xff]/)
   {
      # \x00-\x08\x0B\x0C\x0E-\x1F these chars can't be represented in XML at all
      # \x7f is special
      # \x80-\xff will be mangled into UTF-8
    require MIME::Base64;
    my $nl = (length($_) < 40) ? "" : $NL;
    my $b64 = MIME::Base64::encode($_, $nl);
    return $nl.$b64, qq( encoding="base64");
   }

  
 }

1;

__END__

=head1 NAME

ACIS::Data::DumpXML - Dump arbitrary data structures as XML

=head1 SYNOPSIS

 use ACIS::Data::DumpXML qw(dump_xml);
 $xml = dump_xml(@list)

=head1 DESCRIPTION

        XML,
 Data::DumpXML.   ,  
dumper    parser.      
namespace - ..      root , 
, , .     , 
   ,   
 configuration variables      DIFFERENCE.

This module provide a single function called dump_xml() that takes any
perl data structure as argument and produce a string as result.
The string returned is an XML document that represents any perl data
structures passed in.  Reference loops are handled correctly.

The following data model is used:

   data : scalar*
   scalar = undef | str | ref | alias
   ref : scalar | array | hash | glob | code
   array: scalar*
   hash: (key scalar)*

As an example of the XML documents produced; the following call:

  $a = bless [1,2], "Foo";
  dump_xml($a);

will produce:
  
  <?xml version="1.0" encoding="UTF-8"?>
  <data class="Foo">
   <list-item>1</list-item>
   <list-item>2</list-item>
  </data>
  
If dump_xml() is called in void context, then the dump will be printed
on STDERR automatically.  For compatibility with C<Data::Dump> there
is also an alias for dump_xml() simply called dump().

The C<ACIS::Data::DumpXML::Parser> is a class that can restore
data structures dumped by dump_xml().


=head2 Configuration variables

The generated XML is influenced by a set of configuration variables.
If you modify them, then it is a good idea to localize the effect. E.g.:

  sub my_dump_xml {
      local $Data::DumpXML::INDENT = "";
      local $Data::DumpXML::XML_DECL = 0;
      local $Data::DumpXML::DTD_LOCATION = "";
      local $Data::DumpXML::NS_PREFIX = "dumpxml";

      return dump_xml(@_);
  }

This variables are used from originally written Data::DumpXML:

=over

=item $Data::DumpXML::INDENT

You can set the variable $Data::DumpXML::INDENT to control the amount
of indenting.  The variable contains the whitespace you want to be
used for each level of indenting.  The default is a single space.  To
suppress indenting set it as "".

=item $Data::DumpXML::INDENT_STYLE

This variable controls where end element are placed.  If you set this
variable to the value "Lisp" then end tags are not prefixed by NL.
This give a more compact output.

=item $Data::DumpXML::XML_DECL

This boolean variable controls whether an XML declaration should be
prefixed to the output.  The XML declaration is the <?xml ...?>
thingy.  The default is 1.  Set this value to 0 to suppress the
declaration.

=item $Data::DumpXML::NAMESPACE

This variable contains the namespace used for the the XML elements.
The default is to let this be a URI that actually resolve to the XML
Schema on CPAN.  Set it to "" to disable use of namespaces.

=item $Data::DumpXML::NS_PREFIX

This variable contains the namespace prefix to use on the elements.
The default is "" which means that a default namespace will be declared.

=item $Data::DumpXML::SCHEMA_LOCATION

This variable contains the location of the XML Schema.  If this
variable is non-empty, then an C<xsi:schemaLocation> attribute will be
added the top level C<data> element.  The default is to not include
this as the location can be guessed from the default XML namespace
used.

=item $Data::DumpXML::DTD_LOCATION

This variable contains the location of the DTD.  If this variable is
non-empty, then a <!DOCTYPE ...> will be included in the output.  The
default is to point to the DTD on CPAN.  Set it to "" to suppress the
<!DOCTYPE ...> line.

=back

,      XML.

=over

=item $Data::DumpXML::ROOT_NAME

   root XML .  
  'data'

=item $Data::DumpXML::ARRAY_ELEMENT

   XML ,  
 perl.     'list-item'

=item $Data::DumpXML::HASH_ELEMENT

   XML ,  
 perl.     'hash-item'

=item $Data::DumpXML::REFERENCE

   XML ,  
  perl.     'reference'

=back

=head1 DIFFERENCE

  output Data::DumpXML  ACIS::Data::DumpXML
  ,  ACIS::Data::DumpXML  
 perl.

:

  $a = bless [1,2], "Foo";
  dump_xml($a);

     $a   blessed "Foo" ,
  .

ACIS::Data::DumpXML

  <?xml version="1.0" encoding="UTF-8"?>
  <data class="Foo">
   <list-item>1</list-item>
   <list-item>2</list-item>
  </data>

Data::DumpXML  
  
  <?xml version="1.0" encoding="US-ASCII"?>
  <data xmlns="http://www.cpan.org/.../Data-DumpXML.xsd">
   <ref>
    <array class="Foo">
     <str>1</str>
     <str>2</str>
    </array>
   </ref>
  </data>

  ACIS::Data::DumpXML   ,  :
 perl   ,   

$a -> {b} -> {c} -> {d} ...

  

$a -> {b}{c}{d} ...

       XML, 
       XML:

  root-level     ,  ,
  dumper,     
 perl,  scalar.

,     'list-item'  'hash-item',
     .

        
 (<list-item></list-item>  <list-item />),  
   .

       <empty-array /> 
<empty-hash />;    <undef />

=head1 BUGS

Class names with 8-bit characters will be dumped as Latin-1, but
converted to UTF-8 when restored by the Data::DumpXML::Parser.

The content of globs and subroutines are not dumped.  They are
restored as the strings; "** glob **" and "** code **".

LVALUE and IO objects are not dumped at all.  They will simply
disappear from the restored data structure.

=head1 SEE ALSO

L<ACIS::Data::DumpXML::Parser>, L<XML::Parser>, L<XML::Dumper>, L<Data::Dump>

=head1 AUTHORS

The C<ACIS::Data::DumpXML> module is written by Ivan Baktcheev
<arglebarle@tut.by>, based on C<Data::DumpXML>.

The C<Data::DumpXML> module is written by Gisle Aas <gisle@aas.no>,
based on C<Data::Dump>.

The C<Data::Dump> module was written by Gisle Aas, based on
C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.

 Copyright 2003 Ivan Baktcheev.
 Copyright 1998-2003 Gisle Aas.
 Copyright 1996-1998 Gurusamy Sarathy.

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut
