A Perl script that recursively searches through a directory tree and extracts image URLs from HTML files. Finds all <img src="..."> tags and writes the image URLs to an output file. Useful for scraping image links from websites or HTML archives.

Usage:

perl extr_img.pl <directory> <output_file>

Recursively searches the directory for HTML files and extracts image URLs matching the pattern <img src="..." border="0" />

Source Code

#!/usr/bin/perl
#
sub tree {
 my(@filenames);
 my($root) = $_[0];
 opendir ROOT, $root;
 my(@filelist) = readdir ROOT;
 closedir ROOT;
 foreach $name (@filelist) {
  if ($name ne '.' and $name ne '..') {
   $name = $root . "\\" . $name;
   if (-f $name) {
    @filenames = (@filenames, $name);
    $i++;
   }
   @filenames = (@filenames, tree($name)) if (-d $name);
  }
 }
 print "searching in $root, $i files so far...\n";
 return @filenames;
}

@files = tree($ARGV[0]);

print "opening $ARGV[1]...\n";
open(OUT, ">$ARGV[1]") or die "$ARGV[1] fail...";

foreach $file (@files) {
 print "opening $file...\n";
 open(FILE, "$file") or die "$file fail...";
 while (<FILE>) {
  if ($_ =~ /<img src="([\S]*)" border="0" \/>/im) {
   print OUT "$1\n";
  }
 }
 close(FILE);
}
print "closing $ARGV[1]...\n";
close(OUT);
Loading tracks SoundCloud