#!/usr/bin/env ruby # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. $dot_dir = "~/.pouncer" $: << $dot_dir require 'fileutils' require 'getoptlong' require 'open-uri' require 'tempfile' require 'yaml' require 'zlib' require 'scrapi' module Kernel def stderr *args char = args.first char = char.chr if char.is_a? Fixnum STDERR << char << " " if $batch STDERR << "(" << $config_name << ") " end STDERR << args[1..-1].join(" ") STDERR << "\n" STDERR.flush end def print_exception e=$! stderr ?!, "#{e.to_s} (#{e.class})" for line in e.backtrace stderr ?!, line end end def std_season_episode name case name when /(\d+)x(\d+)/i then [$1.to_i, $2.to_i] when /s(\d+)e(\d+)/i then [$1.to_i, $2.to_i] end end # download "src" into tempfile, yield tempfile name to the block def wget src tmpfile = Tempfile.new 'pouncer' stderr ?<, src open src.gsub("[", "%5B").gsub("]", "%5D") do |f| tmpfile << f.read tmpfile.flush yield tmpfile.path end rescue stderr ?!, "couldn't download", src nil ensure tmpfile.close end end class SearcherBase < Scraper::Base # ugh def self.xattrs; @xattrs end def xattrs; self.class.xattrs end def self.integer elem text(elem).to_i end DumpAttrs = [:name, :link, :filename, :size, :seeds, :leeches, :unique_id, :xattrs] LoadAttrs = DumpAttrs + [:season, :episode, :tempfname] def to_hash (DumpAttrs + (xattrs or [])).inject({}) do |h, a| h[a] = send a rescue nil h end end def self.from_hash hash if hash.is_a? Hash extended_load_attrs = LoadAttrs + (hash[:xattrs] or []) Struct.new(*extended_load_attrs).new(*extended_load_attrs.map {|a| hash[a]}) end end end class PirateRow < SearcherBase process 'td:nth-of-type(4)>a', :link => '@href' process 'a.detLink', :name => :text attr_reader :size alias_method :unique_id, :link process 'td:nth-last-of-type(3)' do |elem| @size = case elem.to_s when /(\d+).*MiB/ then $1.to_i when /(\d+).*GiB/ then $1.to_i * 1024 else 0 end end process 'td:nth-last-of-type(2)', :seeds => :integer process 'td:last-of-type', :leeches => :integer def filename link.split('/').last end end class PirateBay < SearcherBase process 'table#searchResult tr', 'torrents[]' => PirateRow def self.make_url term, i @@search_filter = term.gsub(' ', '.*') 'http://thepiratebay.org/search.php?q=%s&page=%d' % \ [term.tr(' ', '+'), i] end # pirate bay seems to also search in torrent descriptions, # which isn't exactly what we want def collect unless @torrents.nil? @torrents.reject! {|t| t.name !~ /#{@@search_filter}/i} end end end class MininovaRow < SearcherBase attr_reader :link, :filename, :size alias_method :unique_id, :link process 'a[href^="/tor/"]', :name => :text do |elem| @link = 'http://www.mininova.org' + \ elem.attributes['href'].sub('tor', 'get') @filename = @name.tr(' /', '_') + '.' + \ @link.split('/').last + '.torrent' end process 'td:nth-last-of-type(3)' do |elem| @size = case elem.to_s when /(\d+).*MB/ then $1.to_i when /(\d+).*GB/ then $1.to_i * 1024 else 0 end end process 'td:nth-last-of-type(2)', :seeds => :integer process 'td:last-of-type', :leeches => :integer end class Mininova < SearcherBase process 'table.maintable tr', 'torrents[]' => MininovaRow def self.make_url term, i=0 'http://www.mininova.org/search/%s/added/%i' % \ [term.tr(' ', '+'), i+1] end end class BakaUpdatesRow < SearcherBase attr_reader :filename, :seeds, :leeches, :size, :season process 'td:nth-child(1)', :date => :text process 'td:nth-child(2)', :name => :text process 'td:nth-child(3)', :episode => :integer process 'td:nth-child(4)', :group => :text process 'td:nth-child(5)', :crc => :text process 'td:nth-child(6) > a', :link => '@href' def unique_id "#{crc}_#{link}" end @xattrs = [:date, :group] # magic... def collect @filename = '~' + group.gsub(' ', '_') + '~' + name.gsub(' ', '_') + '~' + episode.to_s + '.torrent' @season = 1 @seeds = -1 @leeches = -1 @size = -1 end end class BakaUpdates < SearcherBase process 'tr[bgcolor="#E6F2FD"]', 'torrents[]' => BakaUpdatesRow def self.make_url term, i=0 'http://www.baka-updates.com/search.php?keyword=%s&type=title&page=%i' % [term.gsub(' ', '%20'), i+1] end end class PouncerConfig def extract_season_episode &block @episode_parser = block end def search searcher, query searchers << [searcher, query] end def filter &block filters << block end def prefer value=1, &block preferences << [value, block] end def prefer_dynamic &block preferences << [nil, block] end def action &block actions << block end def action_copy path=nil, &block copy = proc do |t| dest = File.expand_path(block ? block.call(t) : path) if File.directory? dest dest = dest.chomp('/') + '/' + t.filename end FileUtils.cp t.tempfname, dest stderr ?+, dest end actions << copy end def database file @storage = file end def searchers @searchers ||= [] end def filters @filters ||= [] end def preferences @preferences ||= [] end def actions @actions ||= [] end attr_reader :storage, :episode_parser end # find the best torrent given config p, episode cache, and # key = [season, episode]; return the first torrent that downloaded # successfully (download ok and actions didn't raise any exceptions) def fetch_torrent p, cache, key # fetch torrents for given key that haven't been :downloaded torrents = cache[:episodes][key][:list].map do |uid| SearcherBase.from_hash cache[:torrents][uid] end.compact # apply filters for f in p.filters if f.arity == 1 torrents = torrents.select &f else torrents = torrents.select {|t| f.call t, torrents} end end # apply preferences points = Hash.new {|h, k| h[k] = 0} for val, block in p.preferences for t in torrents if block.arity == 1 result = block.call t else result = block.call t, torrents end if val.nil? points[t] += result.is_a?(Numeric) ? result : 0 elsif result points[t] += val end end end torrents = torrents.sort_by {|t| -points[t]} # try downloading all torrents in order, return first ok for t in torrents return t if wget t.link do |tmpname| t.tempfname = tmpname begin for a in p.actions a.call t end :success rescue print_exception nil end end end nil end # open and lock cache file and yield it to the block def with_config_and_cache name, config_code, config_file, &block p = PouncerConfig.new p.instance_eval config_code, config_file if p.storage storage = File.expand_path p.storage else dd = File.expand_path $dot_dir FileUtils.mkdir dd unless File.directory? dd storage = "#{dd}/#{name}.yaml.gz" end lockfile = storage + '.lock' if File.exist? lockfile stderr ?!, lockfile, "exists, exiting" exit 1 end File.open(lockfile, 'w') {} cache = Zlib::GzipReader.open(storage) do |f| YAML.load f end rescue {:episodes => {}, :torrents => {}} p.actions.reverse! p.preferences.reverse! p.filters.reverse! block.call p, cache temp_path = Tempfile.new('pouncer').close!.path Zlib::GzipWriter.open(temp_path) {|f| YAML.dump cache, f} FileUtils.mv temp_path, storage, :force => true ensure FileUtils.rm lockfile, :force => true if lockfile FileUtils.rm temp_path, :force => true if temp_path end # given config struct and opened cache, mark episodes as unwatched; def pounce_unwatch name, cfg_code, cfg_file, episode_list with_config_and_cache name, cfg_code, cfg_file do |p, cache| for ep_str in episode_list if ep_str =~ /(\d+)x(\d+)/ season = $1.to_i ep = $2.to_i stderr ?-, "season #{season}, episode #{ep}" cache[:episodes][[season, ep]] ||= \ {:downloaded => false, :list => []} cache[:episodes][[season, ep]][:downloaded] = false end end end end # given config struct and opened cache, update cache and then # get all available unwatched episodes def pounce_download name, cfg_code, cfg_file with_config_and_cache name, cfg_code, cfg_file do |p, cache| # update the cache for searcher, query in p.searchers page = 0 seen_all = true # continue looking at consecutive search result sub-pages, # until all torrents look familiar begin seen_all = true uri = searcher.make_url(query, page) stderr ?<, uri begin scraps = searcher.scrape(URI.parse(uri)) rescue Scraper::Reader::HTTPUnspecifiedError stderr ?!, "HTTPUnspecifiedError while scraping #{uri}" exit rescue Scraper::Reader::HTTPTimeoutError stderr ?!, "HTTPTimeoutError while scraping #{uri}" exit rescue print_exception exit end partial = scraps.torrents if scraps partial ||= [] # put each previously unseen torrent into the cache for t in partial next unless t.unique_id unless cache[:torrents][t.unique_id] seen_all = false # at this point scraper *might* have filled in # episode and season, try the custom parser if p.episode_parser cust_season, cust_ep = p.episode_parser.call t end # fill in the blanks with standard parser std_season, std_ep = std_season_episode t.name season = cust_season || (t.season rescue nil) || \ std_season ep = cust_ep || (t.episode rescue nil) || std_ep serialized = t.to_hash serialized[:season] = season serialized[:episode] = ep cache[:torrents][t.unique_id] = serialized if ep cache[:episodes][[season, ep]] ||= \ {:downloaded => false, :list => []} cache[:episodes][[season, ep]][:list] << \ t.unique_id end end end page += 1 end until seen_all end # for each episode that we have in cache, see if it has been # downloaded; if not, try to get one and mark as downloaded for key in cache[:episodes].keys unless cache[:episodes][key][:downloaded] if selected = fetch_torrent(p, cache, key) cache[:episodes][key][:downloaded] = true cache[:torrents][selected.unique_id] = :downloaded end end end end end def usage puts <<-EOF Usage: pounce.rb config_file [-u|-r episode] [-b] Without any options, pounce will update the torrent cache and \ download all torrents for yet unseen episodes Options: --unwatch episode - Mark episode as unseen and exit Example: pounce.rb config -u 1x13 Marks episode 13 of season 1 as not yet seen --rewatch episode - Same as '--unwatch', but continue with updating cache and downloading torrents --batch - print additional information EOF exit end opts = GetoptLong.new(['--unwatch', '-u', GetoptLong::REQUIRED_ARGUMENT], ['--rewatch', '-r', GetoptLong::REQUIRED_ARGUMENT], ['--batch', '-b', GetoptLong::NO_ARGUMENT], ['--help', '-h', GetoptLong::NO_ARGUMENT]) # do we want to mark an episode as unseen? $unwatch = nil # do we want to get new torrents? $get_new = true for opt, arg in opts case opt when '--unwatch' $unwatch = arg $get_new = false when '--rewatch' $unwatch = arg when '--help' usage when '--batch' $batch = true end end usage if ARGV.length < 1 config_file = File.expand_path ARGV.first for suffix in ['', '.conf', '.cfg', '.rb'] unless File.exist? config_file config_file = File.expand_path($dot_dir) + "/" + \ ARGV.first + suffix end end # global because it's used also by 'batch mode' $config_name = File.basename(config_file).split('.').first config_code = File.open(config_file) {|f| f.read} if $unwatch pounce_unwatch $config_name, config_code, config_file, [$unwatch] end if $get_new pounce_download $config_name, config_code, config_file end