rdler - ruby

rane 18.06.06 19:53

kätevä skripta suurta määrää samannimisiä filejä lataillessa. ps. älkää antako miinusta siksi että wgetillä voi tehdä saman :)

 Tekstiversio  Arvo: 1 (3 ääntä)  Äänestä: +  -
# rdler.rb - rane
# v0.2 18.6.2006
# downloads all the files in certain range defined in url. (e.g. http://www.microsoft.com/presspass/images/gallery/execs/web/gates-[1-10].jpg)
# to current directory or if -d handle is used new directory will be made.
# usage: ruby rdler.rb -h for optional command line arguments and help
# changelog:
# 0.2 - *nix compatible
# 0.1 - initial release

require 'net/http'
require 'uri'
require 'optparse'
require 'ostruct'
require "fileutils"

class Optparse
def self.parse(args)
  # default values
  options = OpenStruct.new
  options.dir = false # files will be downloaded in - true:  new directory - false: current directory
  options.url = "" # insert default value for URL if you wish
  options.min_size = 10000 # downloaded file will be ignored if it's size is below this
  opts = OptionParser.new do |opts|
    opts.banner = "Usage: rdler.rb [options]"
    opts.separator ""
    opts.separator "Specific options:"
    opts.on("-u", "--url [URL]", String, "e.g. http://www.microsoft.com/presspass/images/gallery/execs/web/gates-[1-5].jpg") do |url|
      options.url = url
    end
    opts.on("-d", "--usedir", "Use directories - default: false") do |dir|
      options.dir = dir
    end
    opts.on("-m", "--minsize", Integer, "default: 10000 bytes - downloaded file will be ignored if it's size is below this") do |i|
      options.min_size = i
    end
    opts.separator ""
    opts.separator "Common options:"
    opts.on_tail("-h", "--help", "Show this message") do
      puts opts
      exit
    end
  end
  opts.parse!(args)
  options
end  # parse()
end  # class Optparse
options = Optparse.parse(ARGV)

def slash
  slash = File::ALT_SEPARATOR || File::SEPARATOR
end

if options.dir == true then
  options.dir = options.url.gsub(/^[a-z]+:\/\//, '').gsub(/\/+/, '-') << slash
else options.dir = '' end

if !FileTest.exists?(options.dir) and options.dir.length > 0 then
  FileUtils.mkdir_p(options.dir)
end

a = options.url.scan(/\[(.*?)\]/) # url -> [start_value-end_value]
a = a[0].to_s.scan(/\b\d+\b/) # [start_value-end_value] -> [start_value, end_value]

start = a[0].to_s.gsub(/^[0]+/, '')
last = a[1].to_s.gsub(/^[0]+/, '')
(start..last).each do |i|
  if a[0].to_s =~ (/^[0]{1}/) and a[0].to_s.length > 1 # if start value has zeroes in the beginning, zero padded id will be used in URL
    id = sprintf("%.#{a[0].to_s.length}d", i)
  else
    id = i
  end
  uri = options.url.to_s.gsub(/\[(.*?)\]/, id)
  uri = URI.parse(uri)
  foo = uri.to_s
  file_name = foo.split('/').last
  file_path = options.dir+file_name
  print "Getting: "+file_name+".. "
  if FileTest.exists?(file_path)
    puts "EXISTS!"
    next
  end
  query = Net::HTTP.get(uri)
  f = File.new(file_path,"w+b")
  f.print query
  f.close
  print "OK!"+' '+File.stat(file_path).size?.to_s+' bytes'
  if File.stat(file_path).size? < options.min_size
    File.delete(file_path)
    print " < #{options.min_size} - ignored"
  end
  print "\n"
end

rane 19:59 18.6.06 
C:\ruby\rane>rdler.rb -u http://www.microsoft.com/presspass/images/gallery/execs/web/gates-[1-5].jpg
Getting: gates-1.jpg.. OK! 245701 bytes
Getting: gates-2.jpg.. OK! 138686 bytes
Getting: gates-3.jpg.. OK! 347822 bytes
Getting: gates-4.jpg.. OK! 243499 bytes
Getting: gates-5.jpg.. OK! 208770 bytes