# HG changeset patch # User darius@Inchoate # Date 1229242695 -37800 # Node ID e8550290e51267c9741f4b3c697c9597f698c5fd # Parent ae9e833e44476c654d666fbf7ede7f0a0a35bcf8 Update parser for new format. diff -r ae9e833e4447 -r e8550290e512 scrape-gm.py --- a/scrape-gm.py Sat Jul 26 20:47:08 2008 +0930 +++ b/scrape-gm.py Sun Dec 14 18:48:15 2008 +1030 @@ -34,6 +34,8 @@ import re, time, datetime, urllib, sys, BeautifulSoup +debug = False + class Server: alltags = re.compile('<[^>]*>') vwhttags = re.compile('<(br|hr)>') @@ -52,22 +54,36 @@ self.maxplayers = maxplayers def __init__(self, pcols, scols): - # pcols[2] = Player name - # pcols[3] = Server description + # pcols[1] = Player name + # pcols[2] = Server description # scols[0] = Players in server / max players - # scols[2] = Server IP - # scols[3] = Server port - # scols[4] = Map name - # scols[5] = Game type - # scols[10] = Update age - self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?") - self.description = pcols[3] - self.ip = scols[2] - self.port = int(scols[3]) - self.mapname = scols[4] - self.gametype = scols[5] - self.updateage = scols[10] - m = self.tuplere.match(scols[0]) + # scols[1] = Server IP & port + # scols[2] = Map name + # scols[3] = Game type + # scols[8] = Update age + if debug: + print "pcols = " + str(pcols) + print "scols = " + str(scols) + + self.pcountre = re.compile("([0-9]+)/([0-9]+)") + self.ipportre = re.compile("([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+):([0-9]+)") + self.sdesc = re.compile(" +[0-9]+\. +(.*)") + + m = self.sdesc.match(pcols[2]) + if (m == None): + raise SyntaxError + self.description = m.group(1) + + m = self.ipportre.match(scols[1]) + if (m == None): + raise SyntaxError + + self.ip = m.group(1) + self.port = int(m.group(2)) + self.gametype = scols[3] + self.mapname = scols[2] + self.updateage = scols[8] + m = self.pcountre.match(scols[0]) if (m == None): raise SyntaxError @@ -107,7 +123,8 @@ playertbl = s.find("table", "results") if (playertbl == None): - #print "Unable to find results" + if True: + print "Unable to find results" return None servertbl = playertbl.findNext("table") @@ -123,7 +140,9 @@ for i in range(len(playerrows[1:])): pcols = playerrows[i].findAll('td') scols = serverrows[i].findAll('td') - if (len(pcols) != 4): + if (len(pcols) != 3): + if debug: + print "pcols has length %d, expected 3" % len(pcols) continue pcols = map(lambda c : Server.FixTags(str(c)), pcols) @@ -135,7 +154,7 @@ s = Server(pcols, scols) servers[stuple] = s - servers[stuple].addplayer(pcols[2]) + servers[stuple].addplayer(pcols[1]) return servers Scrape = staticmethod(Scrape) @@ -144,7 +163,7 @@ self.players.append(pname) -if (1): +if True: maxhits = 10 if (len(sys.argv) < 2): print "Bad usage"