# HG changeset patch # User darius # Date 1188019049 0 # Node ID 8045db05180b7acee66088072e277911f8d25a02 # Parent accc4c4654d7e4e3c37edcd43308a086eb62e54a Initial revision diff -r accc4c4654d7 -r 8045db05180b scrape-vb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scrape-vb.py Sat Aug 25 05:17:29 2007 +0000 @@ -0,0 +1,158 @@ +#!/usr/bin/env python + +############################################################################ +# Screen scraper for Virgin Blue to look for happy hour deals +# +# Prints out (and emails) when criteria match based on cost, +# destination, etc +# +# $Id: scrape-vb.py,v 1.1.1.1 2007/08/25 05:17:29 darius Exp $ +############################################################################ +# +# Copyright (C) 2007 Daniel O'Connor. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +############################################################################ + +import re, BeautifulSoup, datetime, time, smtplib, sys, urllib + +#### Configuration + +### Travel criteria +# Supported keys are email, when, city1, city2, maxcost +# email is mandatory. If city2 is not present either city will be +# matched. when and maxcost are optional (will match for any date or +# cost) +travellers = [ + { 'email' : 'darius@dons.net.au', 'city1' : 'Sydney' }, + { 'email' : 'sarah.mahoney@nehta.gov.au', 'city1' : 'Adelaide', 'city2' : 'Brisbane' }, + ] + +### Mail host +mailhost = 'mail.dons.net.au' + +### Who the email is from +mailfrom = 'darius@dons.net.au' + +### What's onn the subject linee +mailsubj = 'Virgin Blue Happy Hour Deals' + +### Actually send email? +mailsend = False + +### URL to parse +vburl = 'http://virginblue.com.au' + +parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) +parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) + +try: + #f = open("vb-happyhour.html") + f = urllib.urlopen(vburl) +except IOError, e: + print "Unable to fetch page - " + str(e) + sys.exit(1) + +s = BeautifulSoup.BeautifulSoup(f) +hrr = s.find("ul", "happyhr-rows") +if (hrr == None): + print "No happy hour details found" + sys.exit(0) + +hrlist = hrr.findAll("li") + +# XXX: I wanted to use findAll('ul', 'happyhr-conditions') but it +# doesn't work +times = parsetper.match(s.findAll('ul')[11].find('li').string) +if (times == None): + print "Unable to parse travel period " + parsetper.match(s.findAll('ul')[11].find('li')) + sys.exit(0) + +frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) +totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3]) + +#print "Travel from %s to %s" % (str(frtime), str(totime)) + +output = {} +for i in hrlist: + href = i.find('a') + match = parsetitle.match(href['title']) + if (match == None): + print "Unable to match " + str(s) + continue + + city1 = match.group(1) + city2 = match.group(2) + cost = int(match.group(3)) + url = href['href'] + + for t in travellers: + if ('email' not in t): + print "No email key found, configuration error?" + continue + + citymatch = True + if ('city1' in t and 'city2' in t): + if((t['city1'] != city1 or t['city2'] != city2) and + (t['city1'] != city2 or t['city2'] != city1)): + citymatch = False + elif ('city1' in t): + if (t['city1'] != city1 and t['city1'] != city2): + citymatch = False + + datematch = True + if ('when' in t): + travtime = datetime.datetime(*time.strptime(t['when'], "%d/%m/%y")[0:3]) + if (travtime < frtime or travtime > totime): + datematch = False + + costmatch = True + if ('maxcost' in t): + if (cost > int(t['maxcost'])): + costmatch = False + + if (citymatch and datematch and costmatch): + if (t['email'] not in output): + + output[t['email']] = [] + output[t['email']].append([city1, city2, cost, url]) + +if (mailsend): + server = smtplib.SMTP(mailhost) + #server.set_debuglevel(1) + +for o in output: + msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)) + msg = msg + "Your criteria for flights have been matched\r\n\r\n" + print "Sending email to " + o + for i in output[o]: + print "%s <-> %s costs $%d" % (i[0], i[1], i[2]) + msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3]) + + msg = msg + "\r\nNote: travel period is from %s to %s" % \ + (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y")) + if (mailsend): + server.sendmail(mailfrom, o, msg) + else: + print msg + print diff -r accc4c4654d7 -r 8045db05180b vb-happyhour.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vb-happyhour.html Sat Aug 25 05:17:29 2007 +0000 @@ -0,0 +1,1088 @@ + + + + + + + Cheap Flights and Holidays with Great Service from the World's Best Low Cost Airline + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Cheap Flights

Looking for cheap flights or a cheap holiday deal with flights included? We have both right here at virginblue.com.au or check out Blueholidays.com.au for great holiday deals.

+

Did you know Virgin Blue has cheap flights available every single day between 12pm and 1pm? We also offer cheap flights in our V-mail newsletter which you can signup for free here

+
+ + + +
+ +

+ +
+
+ + +
+ + +
+
+ + + + + + + + + +
+
+
+
+ +
+
+ +
+
+ +
+ +
+
+ +
+ + +
+ + + +
+
+ +
+ + + + calendar + +
+ +
+ +
+ +
+ + + + calendar + +
+ +
+
+ + + +
+
+
+
+ Adult + +
+
+ + Child (2-11) + + +
+
+ + Infant (‹ 2) + + +
+
+
+ +

+ Special Needs + Fare Types + Children Travelling Alone +

+
+
+ + + + + + + + +
+ +
+
+
+ + + +
+

Find out more

+

To begin the Web Check-in process enter your reservation number & departure city.

+
    +
  • Opens 24 hrs prior & closes 60 minutes before departure
  • +
  • Guests with Special Service Needs cannot check-in via web
  • +
  • Available domestic flights only
  • +
+ +
+ + + + + + + + + + +
+ + +
+ + +
+
+
+ continue +
+
+
+
+ + + +
+
+

+ + +

+ +
+ + + + calendar + +
+ +
+
+

+ +
+ + + + calendar + +
+ +
+
+ +

+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+ + + + + + +
+ + + + + + + + +
+ +
+
+
+ + +
+ + + + + + + + + + +
+ + + +
+ + + + + + + + + + +
+
+ + + + + + + +
+
+ +
+
+ +
+
+ +
+ +

+
+
+ +
+ Happy Hour is on Now! + + +

Please Note - You must click on the fare details above to access Happy Hour fares.

+
    +
  • Travel Period: 02/02/08 - 28/02/08
  • +
  • ^Direct Flights Only
  • +
  • All fares are subject to availability, are quoted in $AUD and  include Taxes and Charges.
  • +
+
+
+ Blue Holidays - Holiday your way! + +
+
+ Velocity Rewards + + + + +
+
+ +

+
+
+
+
+

Travel Offers

+
+
+
Travel Insurance
+
Select from a great range of policies with B-Secure
+
+
+
Campervans and Motorhomes
+
Book fantastic vehicle deals in Australia and New Zealand with Apollo.
+
+
+
Event Tickets
+
Bluetix offers concerts, theatre, sports tickets and more. Elton John pre-sale on now!
+
+
+
Tours & Activities
+
Find tours and activities in Australia with our partner Godo.
+
+

More fantastic offers


+
+ On Time Performance + Jul + 79% +

+
+ Receive Vmail Updates +

Sign Up for Virgin Blue Sale Fares!

+ +
+
+
+ + + +
+

What's New

+
+ +
+
Virgin Blue Launches Unique Cruise Website
+
Multi-award winning airline Virgin Blue has teamed up with OurVacationStore (OVS) to offer online cruise packages more
+
+
+
V Australia has Arrived
+
Introducing "V Australia" - Australia's New International Airline more
+
+
+
Excerpt from "Winds of Change"
+
The Role of Business in Environmental Sustainability delivered by Brett Godfrey more
+
+
+
International Carry-on Baggage Restrictions
+
New security measures for Guests travelling on International Flights. more
+

+
+ + + + + +
+
+
+
+
+
+ +
+
+ Cruises +
+
+ Hotel Deals at your finger tips +
+
+ Car rentals +
+
+ Peanuts.aero +
+
+
+

+
+ + + + + + + + + + + diff -r accc4c4654d7 -r 8045db05180b vb-nohappyhour.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vb-nohappyhour.html Sat Aug 25 05:17:29 2007 +0000 @@ -0,0 +1,1132 @@ + + + + + + + Cheap Flights and Holidays with Great Service from the World's Best Low Cost Airline + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Cheap Flights

Looking for cheap flights or a cheap holiday deal with flights included? We have both right here at virginblue.com.au or check out Blueholidays.com.au for great holiday deals.

+

Did you know Virgin Blue has cheap flights available every single day between 12pm and 1pm? We also offer cheap flights in our V-mail newsletter which you can signup for free here

+
+ + + +
+ +

+ +
+
+ + +
+ + +
+
+ + + + + + + + + +
+
+
+
+ +
+
+ +
+
+ +
+ +
+
+ +
+ + +
+ + + +
+
+ +
+ + + + calendar + +
+ +
+ +
+ +
+ + + + calendar + +
+ +
+
+ + + +
+
+
+
+ Adult + +
+
+ + Child (2-11) + + +
+
+ + Infant (‹ 2) + + +
+
+
+ +

+ Special Needs + Fare Types + Children Travelling Alone +

+
+
+ + + + + + + + +
+ +
+
+
+ + + +
+

Find out more

+

To begin the Web Check-in process enter your reservation number & departure city.

+
    +
  • Opens 24 hrs prior & closes 60 minutes before departure
  • +
  • Guests with Special Service Needs cannot check-in via web
  • +
  • Available domestic flights only
  • +
+ +
+ + + + + + + + + + +
+ + +
+ + +
+
+
+ continue +
+
+
+
+ + + +
+
+

+ + +

+ +
+ + + + calendar + +
+ +
+
+

+ +
+ + + + calendar + +
+ +
+
+ +

+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+ + + + + + +
+ + + + + + + + +
+ +
+
+
+ + +
+ + + + + + + + + + +
+ + + +
+ + + + + + + + + + +
+
+ + + + + + + +
+
+ +
+
+ +
+
+ +
+ +

+
+
+ +
+
+
+ +
+ + + + Low Fares, Great Service, Everywhere +
+
+ +
+
+
+ + + +
+ + + + + +
+
+ +
+
+ +
+
+
+
+ Blue Holidays - Holiday your way! + +
+
+ Velocity Rewards + + + + +
+
+ +

+
+
+
+
+

Travel Offers

+
+
+
Travel Insurance
+
Select from a great range of policies with B-Secure
+
+
+
Campervans and Motorhomes
+
Book fantastic vehicle deals in Australia and New Zealand with Apollo.
+
+
+
Event Tickets
+
Bluetix offers concerts, theatre, sports tickets and more. Elton John pre-sale on now!
+
+
+
Tours & Activities
+
Find tours and activities in Australia with our partner Godo.
+
+

More fantastic offers


+
+ On Time Performance + Jul + 79% +

+
+ Receive Vmail Updates +

Sign Up for Virgin Blue Sale Fares!

+ +
+
+
+ + + +
+

What's New

+
+ +
+
Virgin Blue Launches Unique Cruise Website
+
Multi-award winning airline Virgin Blue has teamed up with OurVacationStore (OVS) to offer online cruise packages more
+
+
+
V Australia has Arrived
+
Introducing "V Australia" - Australia's New International Airline more
+
+
+
Excerpt from "Winds of Change"
+
The Role of Business in Environmental Sustainability delivered by Brett Godfrey more
+
+
+
International Carry-on Baggage Restrictions
+
New security measures for Guests travelling on International Flights. more
+

+
+ + + + + +
+
+
+
+
+
+ +
+
+ Cruises +
+
+ Hotel Deals at your finger tips +
+
+ Car rentals +
+
+ Peanuts.aero +
+
+
+

+
+ + + + + + + + + + +