changeset 15:789cf10ce4c9

Update for new format (for sure)
author darius@Inchoate
date Sun, 14 Dec 2008 18:55:39 +1030
parents eec2fc32ca88
children eeee17d2072c
files scrape-gm.py
diffstat 1 files changed, 58 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/scrape-gm.py	Sun Dec 14 18:51:14 2008 +1030
+++ b/scrape-gm.py	Sun Dec 14 18:55:39 2008 +1030
@@ -5,10 +5,9 @@
 #
 # Prints out matched player names agreated by server
 #
-# $Id: scrape-gm.py,v 1.3 2007/11/18 08:54:07 darius Exp $
 ############################################################################
 #
-# Copyright (C) 2007 Daniel O'Connor. All rights reserved.
+# Copyright (C) 2008 Daniel O'Connor. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -35,11 +34,14 @@
 
 import re, time, datetime, urllib, sys, BeautifulSoup
 
+debug = False
+
 class Server:
     alltags = re.compile('<[^>]*>')
     vwhttags = re.compile('<(br|hr)>')
     hwhttags = re.compile('\&nbsp;')
-
+    typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>')
+    
     def __init__(self, description = "", ip = "", port = 0, mapname = "",
                  updateage = 0, numplayers = 0, maxplayers = 0, players = []):
         self.description = description
@@ -52,20 +54,36 @@
         self.maxplayers = maxplayers
 
     def __init__(self, pcols, scols):
-        # pcols[2] = Player name
-        # pcols[3] = Server description
+        # pcols[1] = Player name
+        # pcols[2] = Server description
         # scols[0] = Players in server / max players
-        # scols[2] = Server IP
-        # scols[3] = Server port 
-        # scols[4] = Map name
-        # scols[10] = Update age
-        self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?")
-        self.description = pcols[3]
-        self.ip = scols[2]
-        self.port = int(scols[3])
-        self.mapname = scols[4]
-        self.updateage = scols[10]
-        m = self.tuplere.match(scols[0])
+        # scols[1] = Server IP & port
+        # scols[2] = Map name
+        # scols[3] = Game type
+        # scols[8] = Update age
+        if debug:
+            print "pcols = " + str(pcols)
+            print "scols = " + str(scols)
+            
+        self.pcountre = re.compile("([0-9]+)/([0-9]+)")
+        self.ipportre = re.compile("([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+):([0-9]+)")
+        self.sdesc = re.compile(" +[0-9]+\. +(.*)")
+
+        m = self.sdesc.match(pcols[2])
+        if (m == None):
+            raise SyntaxError
+        self.description = m.group(1)
+        
+        m = self.ipportre.match(scols[1])
+        if (m == None):
+            raise SyntaxError
+        
+        self.ip = m.group(1)
+        self.port = int(m.group(2))
+        self.gametype = scols[3]
+        self.mapname = scols[2]
+        self.updateage = scols[8]
+        m = self.pcountre.match(scols[0])
         if (m == None):
             raise SyntaxError
         
@@ -78,15 +96,20 @@
         for p in self.players:
             plist = plist + " " + str(p)
         
-        return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \
-               (self.description, self.mapname, self.numplayers, self.maxplayers, \
-                plist, self.updateage)
+        return "%s: %s (%s:%d) | Map: %s | Players: %d/%d : %s (%s old)" % \
+               (self.gametype, self.description, self.ip, self.port, self.mapname,
+                self.numplayers, self.maxplayers, plist,
+                self.updateage)
     
     def GetTuple(scols):
         return str(scols[2]) + ":" + str(scols[3])
     GetTuple = staticmethod(GetTuple)
 
     def FixTags(s):
+        # Mangle game type
+        t = Server.typetag.match(s)
+        if t != None:
+            s = t.group(1)
         s = re.sub(Server.vwhttags, '\n', s)
         s = re.sub(Server.hwhttags, '', s)
         s = str(BeautifulSoup.BeautifulStoneSoup( \
@@ -98,12 +121,13 @@
     def Scrape(handle):
         s = BeautifulSoup.BeautifulSoup(handle)
 
-        playertbl = s.find("table", "search_table")
+        playertbl = s.find("table", "results")
         if (playertbl == None):
-            #print "Unable to find results"
+            if True:
+                print "Unable to find results"
             return None
         
-        servertbl = playertbl.findNext("table", "search_table")
+        servertbl = playertbl.findNext("table")
     
         playerrows = playertbl.findAll("tr")
         serverrows = servertbl.findAll("tr")
@@ -116,7 +140,9 @@
         for i in range(len(playerrows[1:])):
             pcols = playerrows[i].findAll('td')
             scols = serverrows[i].findAll('td')
-            if (len(pcols) != 4):
+            if (len(pcols) != 3):
+                if debug:
+                    print "pcols has length %d, expected 3" % len(pcols)
                 continue
         
             pcols = map(lambda c : Server.FixTags(str(c)), pcols)
@@ -128,7 +154,7 @@
                 s = Server(pcols, scols)
                 servers[stuple] = s
             
-            servers[stuple].addplayer(pcols[2])
+            servers[stuple].addplayer(pcols[1])
 
         return servers
     Scrape = staticmethod(Scrape)
@@ -137,7 +163,7 @@
         self.players.append(pname)
     
     
-if (1):
+if True:
     maxhits = 10
     if (len(sys.argv) < 2):
         print "Bad usage"
@@ -146,7 +172,7 @@
     
     try:
         #f = open("gm.html")
-        f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player&location=AU")
+        f = urllib.urlopen("http://www.game-monitor.com/search.php?location=AU&search=" + urllib.quote(sys.argv[1]) + "&type=player&location=AU")
     except IOError, e:
         print "Unable to fetch page - " + str(e)
         sys.exit(0)
@@ -158,10 +184,14 @@
     elif (len(servers) == 0):
         print "No players found"
     else:
+        tmp = []
+        for i in servers:
+            tmp.append(servers[i])
+        tmp.sort()
         i = 0
-        for s in servers:
+        for s in tmp:
             i = i + 1
-            print servers[s]
+            print s
             if (i >= maxhits):
                 print "*** Stopping after " + str(maxhits) + " hits"
                 break