##  Perl's and ReDIF-perl's unicode processing test script:
##


use ReDIF::Testing;


eval " { use ReDIF::Unicode; } " ;
test not $@;


use ReDIF::Unicode qw( &has_utf8_bom &has_non_latin1_unicode 
		       &latin1_from_utf8   &utf8_from_latin1 );

open U8, "<t/unicode/unicode-utf8-data_0";    ###   should be utf8 expressible
                                              ###   in latin1
my $data_u8 = join '', <U8>;
close U8;

open LAT1, "<t/unicode/unicode-latin1-data_0"; ###  should be the latin1 equivalent 
                                               ###  of the file mentined earlier
my $data_lat1 = join '', <LAT1>;
close LAT1;

print "data_lat1: $data_lat1\n";

if( not defined $data_u8 
    or not defined $data_lat1
    or ( $data_u8 eq $data_lat1 ) ) {
  nok( "u8: '$data_u8'\nl1: '$data_lat1'" );
} else {
  ok();
}

{
  my $ul = length( $data_u8 );
  my $ll = length( $data_lat1 );

  test $ul != $ll, "lengths: $ul vs. $ll";
}


my $data_u8_lat1 = latin1_from_utf8( $data_u8 );
if( not defined $data_u8_lat1 
    or ( $data_u8_lat1 ne $data_lat1 ) ) {
  nok join '', "data_u8_lat1 = '$data_u8_lat1' ", length ( $data_u8_lat1 ) ,"\n",
    "data_lat1    = '$data_lat1' ", length ( $data_lat1 ) , "\n";

} else {
  ok "converted data ok";
}


{
#  use Encode;
  my $u8 = utf8_from_latin1( $data_lat1 );

  my $ll = length( $data_lat1 );
  
#  eval " { use Encode; Encode::_utf8_off( $u8 ); } ";
  use bytes;
  my $ul = length( $u8 );

  print "raw unicode: $u8\n";

  test $ul != $ll, "lengths: $ul vs. $ll (2) [$u8 vs. $data_lat1]";

}


# test 4

test not has_non_latin1_unicode $data_u8;


# test 5

open U8, "<t/unicode/test_utf8_1.rdf";   ###  should contain 
                                 ###  latin1-incompatible characters
$data_u8 = join '', <U8>;
close U8;

test has_non_latin1_unicode $data_u8;



open U8, "<t/unicode/unicode-utf8-bom_2";    ###  should contain UTF-8 BOM
my $data_u8_bom = join '', <U8>;
close U8;

test has_utf8_bom( $data_u8_bom ), "has bom: $data_u8_bom?"; 


###   regular expressions under 'use utf8' test



my $value = 'president@whitehouse.gov';
test $value =~ /^[\+\w\d\-\.\=\_]+\@[\w\d\-\.\=\_]+\.[\w\d\.\-\=]+$/;


{
    use utf8;
    $value = 'president@whitehouse.gov';
    test $value =~ /^[\+\w\d\-\.\=\_]+\@[\w\d\-\.\=\_]+\.[\w\d\.\-\=]+$/;
}

{
    use utf8;
    $value = 'presidentwhitehouseGov';
    test $value =~ /^[a-z]+$/i;
}


# BEGIN { push @ARGV, '--spec' , './spec/redif.spec'; } 

use rr;

$redif_spec_filename = $ReDIF::CONFIG{spec_full_name}; 
print "redif.spec: $redif_spec_filename\n";

rr::OpenRDF( "t/unicode/DOMINIQUE_2.rdf" ); 
my $result = &rr::NextTemplate();

my $n;

if( $result ) {
    $n = $HashT{'name-full'};
    { use bytes; $result = $n =~ m/L\xe9VY/; }
}

test( $result, $n );


rr::set_options( utf8_output => 1 ) ;

rr::OpenRDF( "t/unicode/DOMINIQUE_2.rdf" ); 

if( &rr::NextTemplate() ) {
    $n = $HashT{'name-full'};
    { 
      use bytes; 
      eval 'use Encode; $n = Encode::encode_utf8( $n );';
      $result = $n =~ m/L\xe9VY/; 
    }
}
print "Name: ", $n, "\n";

#use Encode;


test not $result;


__END__

open LAT1, "<t/DOMINIQUE_2.rdf"; 
$data_lat1 = join '', <LAT1>;
close LAT1;

eval ' 
    use utf8;

    $data_u = utf8_from_latin1( $data_lat1 );

    print "utf8 data: >>>$data_u<<<\n";
    if( $data_u =~ m/\n(handle:\s+.+)/i ) {
	my $handle_attribute = $1;
	my ( $h_value ) = 
	    $handle_attribute =~ 
		m/((?:RePEc|ReLIS|mapin):[a-zA-Z]{3}:\d{4}-[01]\d-[0-3]\d:[\w\-]+)$/;

	print "Dominique Handle: " , $h_value , "\n";
        my $h_v_lat = utf8_from_latin1( $h_value );
	print "Dominique Handle (latin1): " , $h_v_lat , "\n";

    }

    my $data_lat_back = latin1_from_utf8( $data_u );

';
print_result( not $@ );
    


__END__


###   additional BOM identification test(s)

undef $result;
{ 
    use utf8;
    if( $data_u8_bom =~ m/^\x{FEFF}/ ) { $result = 1; } 
    else { $result = 0; } 
} 
if( not defined $result or ($result == 0) ) {
    print "test 8 result : $result\n";
    print "not ";
}

print "ok 8\n";




undef $result;
eval ' { use utf8;
       if( $data_u8_bom =~ m/^\x{FEFF}/ ) { $result = 1; } 
       else { $result = 0; } } ';
if( $@ or not defined $result or ($result == 0) ) {
    print "test 8 result : $result\n";
    print "not ";
}

print "ok 8\n";


__END__



