Extract Images (2004)

A Perl script that recursively searches through a directory tree and extracts image URLs from HTML files. Finds all <img src="..."> tags and writes the image URLs to an output file. Useful for scraping image links from websites or HTML archives.

Usage:

perl extr_img.pl <directory> <output_file>

Recursively searches the directory for HTML files and extracts image URLs matching the pattern <img src="..." border="0" />

Source Code:

#!/usr/bin/perl
#
sub tree {
	my(@filenames);
	my($root) = $_[0];
	opendir ROOT, $root;
	my(@filelist) = readdir ROOT;
	closedir ROOT;
	foreach $name (@filelist) {
		if ($name ne '.' and $name ne '..') { 
			$name = $root . "\\" . $name;
			if (-f $name) {
				@filenames = (@filenames, $name);
				$i++;
			}
			@filenames = (@filenames, tree($name)) if (-d $name); 
		}
	}
	print "searching in $root, $i files so far...\n";
	return @filenames;
}

@files = tree($ARGV[0]);

print "opening $ARGV[1]...\n";
open(OUT, ">$ARGV[1]") or die "$ARGV[1] fail...";

foreach $file (@files) {
	print "opening $file...\n";
	open(FILE, "$file") or die "$file fail...";
	while () {
		if ($_ =~ //im) {
			print OUT "$1\n";
		}
	}
	close(FILE);
}
print "closing $ARGV[1]...\n";
close(OUT);