Find and Replace multiple texts from file a in file b

I have a csv file which has on column A (chinese phrases) and on column B (their english equivalents).

On another .xml file I have large amounts of chinese texts. I want to make the script to search from csv file and if found replace the terms on xml file.

The closest that I got to my aim is this code.

use Cwd;
use File::Basename;
use File::Copy;
use strict;
use warnings;


my $DIR = $0;
my $filename = basename($0);
$DIR = '/Users/moody/Desktop/chinese/';
#my $tablecounter = 0;
my @GLOSSARY;
my $glossaryFile = 'Chinese.csv';
my $glossaryCount = 1;
my $RAWFILE = "Chinese.xml";
my @MODDEDFILES = ('Chinese-modded.html');

BuildGlossary();

my $FILETEXT = "";
my @MODDED_FILETEXT = ('','','','','','','','','');
ReadRaw($RAWFILE);

my $k = 1;
while ($k < $glossaryCount) {
  my $search = $GLOSSARY[$k][0];
  my $replace = $GLOSSARY[$k][1];
  $FILETEXT =~ s/$search/$replace/g;
  $k += 1;
}
$FILETEXT =~ s/<p><\/p>/<\/br>/g;

PrintText($MODDEDFILES[0]);
SplitText();
$k = 1;
while ($k < 3) {
  PrintTextModded($MODDEDFILES[$k], $k);
  $k += 1;
}

exit;



sub PrintText {
  my $filename = $_[0];
  open(my $fh, '>:encoding(UTF-8)', $filename)
    or die "Could not open file '$filename' $!";
  print $fh "<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /><head><style>body {background-color: #bfbfbf; margin-right: 20%; margin-left: 20%;} p {margin: 0px; text-indent: 1.5em; word-wrap: break-word;} h3 {margin: 0px; text-align: center;} hr {border-color:black;}</style></head><body>\n\n";
  print $fh $FILETEXT;
  print $fh "</body></html>\n";
  close $fh;
}

sub SplitText {
  my @temp = split(/(<p>------------------------------------<\/p>)/, $FILETEXT);
  shift @temp;
  my $tempSize = @temp;
  for (my $i=1; $i <= $tempSize; $i+=4) {
    $temp[$i] =~ s/<p>/<h3>/g;
    $temp[$i] =~ s/<\/p>/<\/h3>/g;
  }
  for (my $i=0; $i <= $tempSize; $i+=2) {
    $temp[$i] = "<hr noshade>\n";
  }

  my @volumes = ([0,111],[112,224]);
  for (my $k=0; $k < 2; $k++) {
    my $temptext = "";
    my $i = int($volumes[$k][0]);
    my $u = $volumes[$k][1];
    for (my $j=$i; $j <= $u; $j++) {
      $temptext = $temptext.$temp[$j];
      #$temp[$j] = "";
    }
    $MODDED_FILETEXT[$k] = $temptext;
  }
}

sub PrintTextModded {
  my $filename = $_[0];
  my $num = $_[1];
  open(my $fh, '>:encoding(UTF-8)', $filename)
    or die "Could not open file '$filename' $!";
  print $fh "<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /><head><style>body {background-color: #bfbfbf; margin-right: 20%; margin-left: 20%;} p {margin: 0px; text-indent: 1.5em; word-wrap: break-word;} h3 {margin: 0px; text-align: center;} hr {border-color:black;}</style></head><body>\n\n";

  print $fh $MODDED_FILETEXT[$num-1];
  print $fh "</body></html>\n";
  close $fh;
}


sub ReadRaw {
  my $filename = $_[0];
  open(my $fh, '<:encoding(UTF-8)', $filename)
    or die "Could not open file '$filename' $!";
  $FILETEXT = "";
  while (my $row = <$fh>) {
    chomp $row;
    $FILETEXT = $FILETEXT."<p>".$row."</p>\n";
  }
  close $fh;
}

sub BuildGlossary {
  open(my $fh, '<:encoding(UTF-8)', $glossaryFile)
    or die "Could not open file '$glossaryFile' $!";
  my $num = 1;
  while (my $row = <$fh>) {
    chomp $row;
    my @temp = split "," , $row;
    $GLOSSARY[$num][0] = $temp[0];
    $GLOSSARY[$num][1] = $temp[1];
    $num += 1;
    $glossaryCount += 1;
  }
  close $fh;
}

However, the problem is that it still doesn't work. The problems that I face at the moment are:

Use of uninitialized value in concatenation (.) or string at find.pl line 74.
Use of uninitialized value $filename in open at find.pl line 85.
Use of uninitialized value $filename in concatenation (.) or string at find.pl line 85.
Could not open file '' No such file or directory at find.pl line 85.

Is there anyone who could help me out?

1 answer

  • answered 2018-04-17 05:45 Ali

    The issue was with $DIR = '/Users/moody/Desktop/chinese/'; It had to be changed to $DIR =~ s/$FILENAME//g;