#! /usr/bin/env python2 from BeautifulSoup import BeautifulSoup html = open('PackageStatistics').read() soup = BeautifulSoup(html) for row in soup.findAll('td', **{'style':'width: 200px;'}): pkg = row.contents[0] i,_,s = row.parent.div['title'].replace(',', '').split() print('%s,%s,%s' % (pkg, i, s))