User:Zycloneb
I'm nofx junkee's bitch bot. Technically, I'm the name of the first bot written using the framework that is now named ZycloneB as well. The other bots here written for the other tasks all use the ZycloneB framework as a back-end. That framework is based on the pywikipedia project, but largely rewritten and stripped of Wikipedia specifics. You should be able to use the framework on any recent MediaWiki, but it has only been tested on MediaWiki 1.4.5 and 1.4.7. If you're interested in the code write me a message and I can send it to you.
Contents |
Missions
Project "zycloneb"
My first MO is to do a mass import of all the lyrics from the current site to this wiki. [DONE]
Project "gigs"
My second mission is to import all the gig information that the hinssau collected into the wiki. I actually just formatted the dates into usable wiki links, Faith is doing most of the work on the actual pages. [DONE]
Project "slack"
Third mission is to convert all the compilations to the new format. Luckily, the fact that we don't have info about each release media there makes this pretty easy to automate. We'll need to touch things up manually, but that takes maybe an hour or two. [DONE]
Project "these names are getting worse and worse"
This mission is to create a photo gallery for each band member from the images in the current photo gallery on no-eff-eks.com. This saves us endless uploading hassle. [DONE]
Theme Song
Zyclone B. Bot's very own theme song:
Zyclone B. Bot, A super script for wiki editing, Made by the junkee, But mostly written by smarter men. As pages change the folks'll say, "How'd that Zyclone edit all that shit so fast?" With a better him than me, And a better off this way...
Code
First Mission (Lyrics Import "ZycloneB")
The code for the first mission is:
#!/usr/bin/python
from nofxwiki import NofxWiki
base_dir = '/home/sjs/www/no-eff-eks/lyrics/'
album_dir = base_dir + 'albums/'
lyric_dir = base_dir + 'songs/'
albums_file = base_dir + 'albums.txt'
username = 'zycloneb'
password = 'XXX'
def main():
wiki = NofxWiki()
if not wiki.is_logged_in(username):
wiki.login(username, password)
lyrics = get_lyrics()
failed = ''
for song in lyrics:
albums = lyrics[song][1]
albums.sort()
if len(albums) > 1:
plural = 's'
else:
plural = ''
content = """{{SongLinks|{{PAGENAME}}}}
'''Album%s:'''\n""" % plural
for (year, album) in albums:
content += '*%s - [[%s]]\n' % (year, album)
content += """[[Category:Songs]]"""
p = wiki.get_page(song)
if not p.put(content, new_page=True):
failed += song + '\n'
content = """{{SongLinks|{{PAGENAME}}}}
<pre>
%s
</p re>
[[Category:Lyrics|{{PAGENAMEE}}]]""" % lyrics[song][0]
p = wiki.get_page('Lyrics:'+song)
if not p.put(content, new_page=True):
failed += song + '\n'
f = open('/home/sjs/failed_lyrics.txt', 'w')
f.write(failed)
f.close()
wiki.stop()
def get_albums():
albums = {}
afile = open(albums_file)
for line in afile:
(abbrev, title, year) = line.split(':')
if year[-1] == '\n':
year = year[:-1]
albums[abbrev] = (year, title)
afile.close()
return albums
def get_lyrics():
import os
lyrics = {}
songs = {}
albums = get_albums()
for album in albums:
afile = open(album_dir + album + '.php')
songs[album] = []
i = 0
for line in afile:
line = line.strip()
if not len(line):
continue
if 'songnum' in line:
songnum = int(line[-3:-1])
if line[0] == "'" and len(songs[album]) < songnum:
line = line[1:-1]
if line[-1] == "'":
line = line[:-1]
line = line.replace('\\\'', '\'')
line = line.replace('S&M', 'S/M')
line = line.replace('&', 'And')
songs[album].append(line)
elif line[0] == "'":
line = line[1:-1]
if line[-1] == "'":
line = line[:-1]
lfile = lyric_dir + line
line = line.replace('.lyr', '')
if os.path.exists(lfile):
lyric = open(lfile).read()
lyric = lyric.replace('&', '&')
lyric = lyric.replace('"', '"')
lyric = lyric.replace('<br />', '')
lyric = lyric.replace('<p>', '\n')
else:
lyric = ''
if lyrics.has_key(songs[album][i]):
lyrics[songs[album][i]][1].append(albums[album])
else:
lyrics[songs[album][i]] = (lyric, [albums[album]])
i += 1
else: # line has no relevant data
continue
afile.close()
return lyrics
if __name__ == '__main__':
main()
Second Mission ("Gigs")
The code is pretty simple, the most complex thing is translating the abbreviations:
#!/usr/bin/python
import os
from nofxwiki import NofxWiki
import re
gigdir = '/home/sjs/www/nofxwiki/gigs'
countries = {
'A': 'Austria',
'AUS': 'Australia',
'B': 'Belgium',
'CAN': 'Canada',
'CH': 'Switzerland',
'D': 'Germany',
'DK': 'Denmark',
'ESP': 'Spain',
'F': 'France',
'FIN': 'Finland',
'GRE': 'Greece',
'ICE': 'Iceland',
'I': 'Italy',
'IRE': 'Ireland',
'JAP': 'Japan',
'NL': 'Netherlands',
'NOR': 'Norway',
'NZ': 'New Zealand',
'POL': 'Poland',
'POR': 'Portugal',
'RUS': 'Russia',
'SLO': 'Slovenia',
'SWE': 'Sweden',
'UK': 'United Kingdom',
'USA': 'USA'
}
provinces = {'BC': 'British Columbia',
'QUE': 'Quebec',
'ONT': 'Ontario',
'AB': 'Alberta',
'MON': 'Manitoba'
}
states = {'AL': 'Alabama',
'AK': 'Alaska',
'AZ': 'Arizona',
'AR': 'Arkansas',
'CA': 'California',
'CO': 'Colorado',
'CT': 'Connecticut',
'DE': 'Delaware',
'DC': 'District of Columbia',
'FL': 'Florida',
'GA': 'Georgia',
'HI': 'Hawaii',
'ID': 'Idaho',
'IL': 'Illinois',
'IN': 'Indiana',
'IA': 'Iowa',
'KS': 'Kansas',
'KY': 'Kentucky',
'LA': 'Louisiana',
'ME': 'Maine',
'MH': 'Marshall Islands',
'MD': 'Maryland',
'MA': 'Massachusetts',
'MI': 'Michigan',
'MN': 'Minnesota',
'MS': 'Mississippi',
'MO': 'Missouri',
'MT': 'Montana',
'NE': 'Nebraska',
'NV': 'Nevada',
'NH': 'New Hampshire',
'NJ': 'New Jersey',
'NM': 'New Mexico',
'NY': 'New York',
'NC': 'North Carolina',
'ND': 'North Dakota',
'OH': 'Ohio',
'OK': 'Oklahoma',
'OR': 'Oregon',
'PA': 'Pennsylvania',
'PR': 'Puerto Rico',
'RI': 'Rhode Island',
'SC': 'South Carolina',
'SD': 'South Dakota',
'TN': 'Tennessee',
'TX': 'Texas',
'UT': 'Utah',
'VT': 'Vermont',
'VI': 'Virgin Islands',
'VA': 'Virginia',
'WA': 'Washington',
'WV': 'West Virginia',
'WI': 'Wisconsin',
'WY': 'Wyoming'
}
def main():
for year in os.listdir(gigdir):
for tour in os.listdir(gigdir + '/' + year):
txt = ''
month = None
f = open(gigdir + '/' + year + '/' + tour)
for show in f:
# Oct.
m = re.match('[a-zA-Z]+', show)
if m:
month = show[:3]
continue
# 26. (CAN) City, PR
m = re.match('(\d+)\.\s\(([^\)]*)\)\s(.*)', show)
if m:
day = m.group(1).strip()
if len(day) == 1:
day = '0' + day
date = '%s-%s-%s' % (month[:3], day, year)
country = countries[m.group(2)]
city = m.group(3).strip()
if country == 'Canada':
for pr in provinces:
city = city.replace(pr, provinces[pr])
link = date[:-5] + ' ' + city + ', ' + country
txt += '#[[%s|%s]]\n' % (date, link)
continue
# 26. City, PR
m = re.match('(\d+)\.\s(.*),\s([A-Z]{2})(.*)', show)
if m:
day = m.group(1).strip()
if len(day) == 1:
day = '0' + day
date = '%s-%s-%s' % (month[:3], day, year)
city = m.group(2).strip()
state = states[m.group(3)]
venue = m.group(4).strip()
link = date[:-5] + ' %s, %s, %s' % (city, state, 'USA')
if venue:
link += ' (%s)' % venue
txt += '#[[%s|%s]]\n' % (date, link)
continue
# http://link.to.image
m = re.match('http', show)
if m:
txt += show + '\n'
continue
print '*** wtf is this? =>' + show.strip() + '<= (' + tour + ')'
txt += show
f.close()
f = open(gigdir + '/' + year + '/' + tour.replace('.txt', '.wiki.txt'), 'w')
f.write(txt)
f.close()
print 'wrote => ' + gigdir + '/' + year + '/' + tour.replace('.txt', '.wiki.txt')
if __name__ == '__main__':
main()
Third Mission (Compilations "Slack")
Basically, each current page is scanned line by line and the info there is extracted. Take a template of the new format with variable names such as $date and $tracks in the proper places, substitute, and update the wiki.
#!/usr/bin/python
from zycloneb import NofxWiki
import re
from string import Template
def main():
wiki = NofxWiki()
wiki.login('zycloneb', 'XXX')
f = open('/home/sjs/www/nofxwiki/wiki/comp_template.wiki')
tpl = Template(f.read())
f.close()
comps = wiki.get_page('Compilations')
for comp in comps.get_links():
comp = wiki.get_page(comp)
txt = comp.get()
# already new-school baby!
if txt[:2] == '{|':
print 'skipping ' + comp.name()
continue
d = {'img': '',
'date': '',
'label': '',
'oop': '',
'media': '',
'catid': '',
'nofxsongs': '',
'buy': '',
'tracks': '',
'vinyl': '',
'cd': '',
'cs': '',
'dvd': '',
'vhs': ''
}
txt = re.sub(r"'''''", '', txt)
txt = re.sub(r"'''", '', txt)
txt = re.sub(r"''", '', txt)
txt = re.sub(r'<br>', '', txt)
for line in txt.split('\n'):
line = line.strip()
m = re.match(r'\[\[Image:([^|]*)\|', line)
if m:
d['img'] = m.group(1)
continue
m = re.match(r'OUT OF PRINT', line)
if m:
d['oop'] = m.group(0)
continue
m = re.match(r'STILL AVAILABLE', line)
if m:
d['oop'] = m.group(0)
continue
m = re.match(r'Date:\s*(.*)', line)
if m:
d['date'] = m.group(1)
continue
m = re.match(r'Label:\s*(.*)', line)
if m:
d['label'] = m.group(1)
continue
m = re.match(r'Media:\s*(.*)', line)
if m:
d['media'] = m.group(1)
continue
m = re.match(r'Catalog.*:\s*(.*)', line)
if m:
d['catid'] = m.group(1)
continue
m = re.match(r'NOFX Songs*:\s*(.*)', line)
if m:
d['nofxsongs'] = m.group(1)
if d['nofxsongs'][:2] != '[[':
d['nofxsongs'] = '[['+d['nofxsongs']+']]'
continue
m = re.search(r'[aA]ppears.*[aA]lbum', line)
if m:
d['nofxsongs'] += '\n' + line
continue
m = re.search(r'released', line)
if m:
d['nofxsongs'] += '\n' + line
continue
m = re.match(r'Buy [iI]t:\s*(.*)', line)
if m:
d['buy'] = m.group(1)
continue
m = re.match(r'\d\d\.\s(.*)\s-\s(.*)', line)
if m:
artist = m.group(1)
track = m.group(2)
if artist.lower() == 'nofx' and track[:2] != '[[':
track = '[['+track+']]'
d['tracks'] += '#%s - %s\n' % (artist, track)
continue
if 'fat' in d['buy']:
d['buy'] = '[%s Fat Wreck Chords]' % d['buy']
if 'epitaph' in d['buy']:
d['buy'] = '[%s Epitaph Store]' % d['buy']
if 'LP' in d['media'] or '7"' in d['media']:
d['media'] = re.sub(r'LP', '[[#Vinyl|LP]]', d['media'])
d['media'] = re.sub(r'7"', '[[#Vinyl|LP]]', d['media'])
d['vinyl'] = """== Vinyl ==
'''''%s'''''
'''Format:''' 12\" LP, 33 RPM
'''Colour:''' Black
'''Copies:''' Unknown number
'''Matrix A Side:'''
'''Matrix B Side:'''
'''Average Price:''' US$
{{NeedPhotos}}""" % (d['oop'])
if 'CD' in d['media']:
d['media'] = re.sub(r'CD', '[[#Compact Disc|CD]]', d['media'])
d['cd'] = """== Compact Disc ==
'''''%s'''''
'''Format:''' CD
'''Copies:''' Unknown number
'''Buy It:''' %s
'''Average Price:''' US$
{{NeedPhotos}}""" % (d['oop'], d['buy'])
if 'CS' in d['media']:
d['media'] = re.sub(r'CS', '[[#Cassette|CS]]', d['media'])
d['cs'] = """== Cassette ==
'''''%s'''''
'''Copies:''' Unknown number
'''Buy It:''' %s
'''Average Price:''' US$
{{NeedPhotos}}""" % (d['oop'], d['buy'])
if 'DVD' in d['media']:
d['media'] = re.sub(r'DVD', '[[#DVD|DVD]]', d['media'])
d['dvd'] = """== DVD ==
'''''%s'''''
'''Copies:''' Unknown number
'''Buy It:''' %s
'''Average Price:''' US$
{{NeedPhotos}}""" % (d['oop'], d['buy'])
if 'VHS' in d['media']:
d['media'] = re.sub(r'VHS', '[[#VHS|VHS]]', d['media'])
d['vhs'] = """== VHS ==
'''''%s'''''
'''Format:''' NTSC
'''Copies:''' Unknown number
'''Buy It:''' %s
'''Average Price:''' US$
{{NeedPhotos}}""" % (d['oop'], d['buy'])
if not d['tracks']:
d['tracks'] = '{{NeedTracks}}'
#print tpl.substitute(d)
comp.put(tpl.substitute(d), comment="converted to new format [project \"slack\"]")
wiki.stop()
if __name__ == '__main__':
main()