# coding: utf-8
require 'spec_helper'

describe 'basic crawler setup' do
  it 'should crawl page' do
    VCR.use_cassette('basic_crawler_page') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)

      crawler.base_url "http://www.terra.com.br"
      crawler.path '/portal'

      crawler.search "css=.btn-search"
      crawler.social do
        twitter "css=.ctn-bar li.last"
      end
      crawler.links "css=.ctn-links", :iterator do
        menu "css=a"
      end
      crawler.subheader "css=h2.ttl-dynamic" do |h|
        h.gsub("London", "Londres")
      end

      crawler_instance = crawler.new

      results = crawler_instance.crawl

      results["search"].should == "Buscar"
      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
      results["subheader"].should == "Londres 2012"
      results["social"]["twitter"].should == "Verão"
    end
  end

  it 'should crawl a Mechanize::Page' do
    VCR.use_cassette('basic_crawler_page') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)

      m = Mechanize.new
      mp = m.get "http://www.terra.com.br/portal"
      crawler.page mp

      crawler.search "css=.btn-search"
      crawler.social do
        twitter "css=.ctn-bar li.last"
      end
      crawler.links "css=.ctn-links", :iterator do
        menu "css=a"
      end
      crawler.subheader "css=h2.ttl-dynamic" do |h|
        h.gsub("London", "Londres")
      end

      crawler_instance = crawler.new

      results = crawler_instance.crawl

      results["search"].should == "Buscar"
      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
      results["subheader"].should == "Londres 2012"
      results["social"]["twitter"].should == "Verão"
    end
  end

  it 'should support hash based selectors' do
    VCR.use_cassette('basic_crawler_page') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)

      crawler.base_url "http://www.terra.com.br"
      crawler.path '/portal'

      crawler.search css: ".btn-search"
      crawler.social do
        twitter css: ".ctn-bar li.last"
      end
      crawler.links({css: ".ctn-links"}, :iterator) do
        menu css: "a"
      end
      crawler.subheader css: "h2.ttl-dynamic" do |h|
        h.gsub("London", "Londres")
      end

      crawler_instance = crawler.new

      results = crawler_instance.crawl

      results["search"].should == "Buscar"
      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
      results["subheader"].should == "Londres 2012"
      results["social"]["twitter"].should == "Verão"
    end
  end

  it 'should clear iterators between multiple runs' do
    crawler = Class.new
    crawler.send(:include, Wombat::Crawler)

    crawler.base_url "http://www.terra.com.br"
    crawler.path '/portal'

    crawler.links "css=.ctn-links", :iterator do
      menu "css=a"
    end

    crawler_instance = crawler.new
    result_hash = [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
    results = nil

    VCR.use_cassette('basic_crawler_page') do
      results = crawler_instance.crawl
    end

    results["links"].should == result_hash

    VCR.use_cassette('basic_crawler_page') do
      results = crawler_instance.crawl
    end

    results["links"].should == result_hash
  end

  it 'should crawl page through block to class instance crawl method' do
    VCR.use_cassette('basic_crawler_page') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)
      crawler_instance = crawler.new
      results = crawler_instance.crawl do
        base_url "http://www.terra.com.br"
        path '/portal'

        search "css=.btn-search"

        social do
          twitter "css=.ctn-bar li.last"
        end

        links "css=.ctn-links", :iterator do
          menu "css=a"
        end

        subheader "css=h2.ttl-dynamic" do |h|
          h.gsub("London", "Londres")
        end
      end

      results["search"].should == "Buscar"
      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
      results["subheader"].should == "Londres 2012"
      results["social"]["twitter"].should == "Verão"
    end
  end

  it 'should crawl page through static crawl method' do
    VCR.use_cassette('basic_crawler_page') do
      results = Wombat.crawl do
        base_url "http://www.terra.com.br"
        path '/portal'

        search "css=.btn-search"

        social do
          twitter "css=.ctn-bar li.last"
        end

        links "css=.ctn-links", :iterator do
          menu "css=a"
        end

        subheader "css=h2.ttl-dynamic" do |h|
          h.gsub("London", "Londres")
        end
      end

      results["search"].should == "Buscar"
      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
      results["subheader"].should == "Londres 2012"
      results["social"]["twitter"].should == "Verão"
    end
  end

  it 'should iterate elements' do
    VCR.use_cassette('for_each_page') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)

      crawler.base_url "https://www.github.com"
      crawler.path "/explore"

      crawler.repos "css=ol.ranked-repositories>li", :iterator do
        project do
          repo 'css=h3'
          description('css=p.description') { |d| d ? d.gsub(/for/, '') : nil }
        end
      end

      results = crawler.new.crawl

      results.should == { "repos" => [
        { "project" => { "repo" => "jairajs89 / Touchy.js", "description" => "A simple light-weight JavaScript library  dealing with touch events" } },
        { "project" => { "repo" => "mcavage / node-restify", "description" => "node.js REST framework specifically meant  web service APIs" } },
        { "project" => { "repo" => "notlion / streetview-stereographic", "description" => "Shader Toy + Google Map + Panoramic Explorer" } },
        { "project" => { "repo" => "twitter / bootstrap", "description" => "HTML, CSS, and JS toolkit from Twitter" } },
        { "project" => { "repo" => "stolksdorf / Parallaxjs", "description" => "a Library  Javascript that allows easy page parallaxing" } },
        { "project" => { "repo" => nil, "description" => nil}}
      ]}
    end
  end

  it 'should crawl xml with namespaces' do
    VCR.use_cassette('xml_with_namespace') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)

      crawler.document_format :xml
      crawler.base_url "http://ws.audioscrobbler.com"
      crawler.path "/2.0/?method=geo.getevents&location=#{URI.escape('San Francisco')}&api_key=060decb474b73437d5bbec37f527ae7b"

      crawler.artist "xpath=//title", :list

      crawler.location 'xpath=//event', :iterator do
        latitude "xpath=./venue/location/geo:point/geo:lat", :text, { 'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#' }
        longitude "xpath=./venue/location/geo:point/geo:long", :text, { 'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#' }
      end

      crawler_instance = crawler.new
      results = crawler_instance.crawl
      iterator = results['location']

      iterator.should == [
        {"latitude"=>"37.807775", "longitude"=>"-122.272736"},
        {"latitude"=>"37.807717", "longitude"=>"-122.270059"},
        {"latitude"=>"37.869784", "longitude"=>"-122.267701"},
        {"latitude"=>"37.870873", "longitude"=>"-122.269313"},
        {"latitude"=>"37.782348", "longitude"=>"-122.408059"},
        {"latitude"=>"37.775529", "longitude"=>"-122.437757"},
        {"latitude"=>"37.771079", "longitude"=>"-122.412604"},
        {"latitude"=>"37.771079", "longitude"=>"-122.412604"},
        {"latitude"=>"37.784963", "longitude"=>"-122.418871"},
        {"latitude"=>"37.788978", "longitude"=>"-122.40664"}
      ]

      results["artist"].should =~ ["Davka", "Digitalism (DJ Set)", "Gary Clark Jr.", "Lenny Kravitz", "Little Muddy", "Michael Schenker Group", "The Asteroids Galaxy Tour", "When Indie Attacks", "When Indie Attacks", "YOB"]
    end
  end

  it 'should follow links' do
    VCR.use_cassette('follow_links') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)

      crawler.base_url "https://www.github.com"
      crawler.path "/"

      crawler.github 'xpath=//ul[@class="footer_nav"][1]//a', :follow do
        heading 'css=h1'
      end

      crawler_instance = crawler.new
      results = crawler_instance.crawl

      results.should == {
        "github" => [
          { "heading"=>"GitHub helps people build software together." },
          { "heading"=>nil },
          { "heading"=>"Features" },
          { "heading"=>"Contact GitHub" },
          { "heading"=>"GitHub Training — Git Training from the Experts" },
          { "heading"=>"GitHub on Your Servers" },
          { "heading"=>"Loading..." }
        ]
      }
    end
  end

  it 'should follow links - issue #53' do
    VCR.use_cassette('follow_links_v2', :preserve_exact_body_bytes => true) do
      result = Wombat.crawl do
        base_url "http://www.icy-veins.com/"
        path "heroes/hero-guides"

        heroes  "css=.page_content .nav_content_block_entry_heroes_hero", :iterator do
          name "xpath=."
          builds "xpath=./a", :follow do
              title "css=h1"
          end
        end
      end

      expect(result).to eq(
        {"heroes"=>
          [{"name"=>"Abathur",
            "builds"=>[{"title"=>"Abathur Build Guide “You. Enhanced. Improved.”"}]},
           {"name"=>"Anub'arak",
            "builds"=>[{"title"=>"Anub'arak Build Guide “Time is fleeting.”"}]},
           {"name"=>"Artanis",
            "builds"=>[{"title"=>"Artanis Build Guide “Direct my wrath.”"}]},
           {"name"=>"Arthas",
            "builds"=>[{"title"=>"Arthas Build Guide “Frostmourne hungers.”"}]},
           {"name"=>"Azmodan",
            "builds"=>[{"title"=>"Azmodan Build Guide “I shall rule alone!”"}]},
           {"name"=>"Brightwing",
            "builds"=>
             [{"title"=>"Brightwing Build Guide “You don't want to be my enemy!”"}]},
           {"name"=>"Chen",
            "builds"=>[{"title"=>"Chen Build Guide “I bring Pandamonium!”"}]},
           {"name"=>"Cho",
            "builds"=>
             [{"title"=>
                "Cho Build Guide “This Nexus, all its power... it will be MINE!”"}]},
           {"name"=>"Diablo",
            "builds"=>
             [{"title"=>"Diablo Build Guide “Kneel before the Lord of Terror.”"}]},
           {"name"=>"E.T.C.",
            "builds"=>[{"title"=>"E.T.C. Build Guide “You can't kill the metal!”"}]},
           {"name"=>"Falstad",
            "builds"=>[{"title"=>"Falstad Build Guide “Time to drop the hammer!”"}]},
           {"name"=>"Gall",
            "builds"=>
             [{"title"=>
                "Gall Build Guide “Those who oppose me invite their own demise!”"}]},
           {"name"=>"Gazlowe",
            "builds"=>[{"title"=>"Gazlowe Build Guide “Hey, time is money friend.”"}]},
           {"name"=>"Greymane",
            "builds"=>[{"title"=>"Greymane Build Guide “I am the alpha!”"}]},
           {"name"=>"Illidan",
            "builds"=>[{"title"=>"Illidan Build Guide “Now I am complete!”"}]},
           {"name"=>"Jaina",
            "builds"=>[{"title"=>"Jaina Build Guide “I'm here to help.”"}]},
           {"name"=>"Johanna",
            "builds"=>[{"title"=>"Johanna Build Guide “The Crusade marches on!”"}]},
           {"name"=>"Kael'thas",
            "builds"=>[{"title"=>"Kael'thas Build Guide “Anar'alah belore!”"}]},
           {"name"=>"Kerrigan",
            "builds"=>[{"title"=>"Kerrigan Build Guide “Long live the real Queen.”"}]},
           {"name"=>"Kharazim",
            "builds"=>[{"title"=>"Kharazim Build Guide “Feel the wrath of Ytar!”"}]},
           {"name"=>"Leoric",
            "builds"=>
             [{"title"=>"Leoric Build Guide “All will suffer as I have suffered!”"}]},
           {"name"=>"Li Li",
            "builds"=>[{"title"=>"Li Li Build Guide “Ready for adventure!”"}]},
           {"name"=>"Lt. Morales",
            "builds"=>
             [{"title"=>
                "Lt. Morales Build Guide “I protect every member of my squad!”"}]},
           {"name"=>"Lunara",
            "builds"=>[{"title"=>"Lunara Build Guide “Taste my spear!”"}]},
           {"name"=>"Malfurion",
            "builds"=>
             [{"title"=>"Malfurion Build Guide “Nature will rise against you!”"}]},
           {"name"=>"Muradin",
            "builds"=>[{"title"=>"Muradin Build Guide “It's hammer time!”"}]},
           {"name"=>"Murky",
            "builds"=>[{"title"=>"Murky Build Guide “Mrglrglmrglmrrrlggg!”"}]},
           {"name"=>"Nazeebo",
            "builds"=>[{"title"=>"Nazeebo Build Guide “The spirits speak to me.”"}]},
           {"name"=>"Nova",
            "builds"=>
             [{"title"=>"Nova Build Guide “Ready to have your mind blown?”"}]},
           {"name"=>"Raynor",
            "builds"=>[{"title"=>"Raynor Build Guide “Hit 'em hard and fast.”"}]},
           {"name"=>"Rehgar",
            "builds"=>[{"title"=>"Rehgar Build Guide “To the Arena!”"}]},
           {"name"=>"Rexxar",
            "builds"=>[{"title"=>"Rexxar Build Guide “The beasts obey me!”"}]},
           {"name"=>"Sgt. Hammer",
            "builds"=>[{"title"=>"Sgt. Hammer Build Guide “Napalm's airborne!”"}]},
           {"name"=>"Sonya",
            "builds"=>[{"title"=>"Sonya Build Guide “Time to die!”"}]},
           {"name"=>"Stitches",
            "builds"=>[{"title"=>"Stitches Build Guide “ROAAAARR!”"}]},
           {"name"=>"Sylvanas",
            "builds"=>[{"title"=>"Sylvanas Build Guide “Let none survive!”"}]},
           {"name"=>"Tassadar",
            "builds"=>[{"title"=>"Tassadar Build Guide “Executor, I stand ready!”"}]},
           {"name"=>"The Butcher",
            "builds"=>[{"title"=>"The Butcher Build Guide “Fresh meat!”"}]},
           {"name"=>"Thrall",
            "builds"=>
             [{"title"=>"Thrall Build Guide “The Elements will destroy you!”"}]},
           {"name"=>"Tychus",
            "builds"=>[{"title"=>"Tychus Build Guide “So, you gonna bark all day?”"}]},
           {"name"=>"Tyrael",
            "builds"=>[{"title"=>"Tyrael Build Guide “I am Justice itself!”"}]},
           {"name"=>"Tyrande",
            "builds"=>
             [{"title"=>"Tyrande Build Guide “Feel the wrath of the Heavens!”"}]},
           {"name"=>"Uther",
            "builds"=>[{"title"=>"Uther Build Guide “I will fight with honor!”"}]},
           {"name"=>"Valla",
            "builds"=>[{"title"=>"Valla Build Guide “Be vewy, vewy quiet...”"}]},
           {"name"=>"Zagara",
            "builds"=>[{"title"=>"Zagara Build Guide “The Swarm hungers.”"}]},
           {"name"=>"Zeratul",
            "builds"=>[{"title"=>"Zeratul Build Guide “I serve the Xel'naga.”"}]}]})
    end
  end

  it 'should make post requests if needed' do
    VCR.use_cassette('make_post_request') do
      data = { your_name: "Name" }
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)
      crawler.base_url "http://hroch486.icpf.cas.cz"
      crawler.path "/cgi-bin/echo.pl"
      crawler.http_method :post
      crawler.data data

      crawler.my_name 'css=ul:last li:last'

      crawler_instance = crawler.new
      results = crawler_instance.crawl

      results["my_name"].should eq("your_name = Name")
    end
  end

  it 'should let the url be passed as an argument to crawl' do
    VCR.use_cassette('basic_crawler_page') do
      crawler = Class.new
      crawler.send(:include, Wombat::Crawler)
      crawler.send(:title, 'xpath=//head/title')
      crawler_instance = crawler.new
      results = crawler_instance.crawl('http://www.terra.com.br/portal')
      results['title'].should eq('Terra - Notícias, vídeos, esportes, economia, diversão, música, moda, fotolog, blog, chat')
    end
  end
end
