midi file parser


S

Sean McIlroy

"""

A Sequence is a list [FormatType, TimeDivision, Tracks] where

*) FormatType is in [0,1,2]
*) TimeDivision is either [TicksPerBeat] with TicksPerBeat in range
(2**15) or
[FramesPerSecond, TicksPerFrame] with FramesPerSecond in range
(2**7)
and TicksPerFrame in range(2**8)
*) Tracks is a list of Events

An Event is either a ChannelEvent or a MetaEvent.
A ChannelEvent is [DeltaTime, EventType, Channel, Parameters]
and a MetaEvent is [DeltaTime, MetaType, Message] where

*) DeltaTime is a nonnegative integer
*) EventType is in range(7)
*) Channel is in range(2**4)
*) Parameters is a list with elements in range(2**7)
*) MetaType is in range(2**7)
*) Message is a string

The EventTypes and Parameters of ChannelEvents have the following
verbal handles:

EventType Parameters

0 = NoteOff [NoteNumber, Velocity]
1 = NoteOn [NoteNumber, Velocity]
2 = NoteAftertouch [NoteNumber, Amount]
3 = Controller [ControllerType, Value]
4 = ProgramChange [ProgramNumber]
5 = ChannelAftertouch [Amount]
6 = PitchBend [ValueLSB, ValueMSB]

"""

def concat(xs):
from itertools import chain
return list(chain(*xs))

def zeropadded(digits,minlength):
return [0]*(minlength-len(digits)) + digits

def noleadingzeros(digits):
while digits[0]==0 and len(digits)>1: digits = digits[1:]
return digits

def number2digits(number,base):
digits = [number]
while digits[0]>=base: digits[0:1] = [digits[0]//base, digits
[0]%base]
return digits

def digits2number(digits,base):
reversedigits = reversed(noleadingzeros(digits))
basepowers = [base**n for n in range(len(digits))]
return sum([x*y for (x,y) in zip(reversedigits,basepowers)])

def number2fixedlength(number,length):
return zeropadded(number2digits(number,2**8),length)

def fixedlength2number(digits):
return digits2number(digits,2**8)

def number2variablelength(number):
digits = number2digits(number,2**7)
padding = [2**7]*(len(digits)-1) + [0]
return [x+y for (x,y) in zip(digits,padding)]

def variablelength2number(variablelength):
padding = [2**7]*(len(variablelength)-1) + [0]
digits = [x-y for (x,y) in zip(variablelength,padding)]
return digits2number(digits,2**7)

def smallbyte(number):
return number < (2**7)

def getfixedlength(numbers,startindex,numbytes):
endindex = startindex + numbytes
return (endindex, numbers[startindex:endindex])

def getvariablelength(numbers,startindex):
index = startindex
while not smallbyte(numbers[index]): index = index + 1
endindex = index + 1
return (endindex, numbers[startindex:endindex])

def analyzetimedivision(numbers):
[byte1, byte2] = numbers
indicator = byte1 // (2**7)
firstbyte = byte1 % (2**7)
secondbyte = byte2
if indicator==0:
ticksperbeat = (2**8) * firstbyte + secondbyte
return [ticksperbeat]
if indicator==1:
framespersecond = firstbyte
ticksperframe = secondbyte
return [framespersecond, ticksperframe]

def synthesizetimedivision(numbers):
if len(numbers)==1:
[ticksperbeat] = numbers
firstbyte = ticksperbeat // (2**8)
secondbyte = ticksperbeat % (2**8)
indicator = 0
if len(numbers)==2:
[framespersecond, ticksperframe] = numbers
firstbyte = framespersecond
secondbyte = ticksperframe
indicator = 1
byte1 = indicator * (2**7) + firstbyte
byte2 = secondbyte
return [byte1, byte2]

def analyzeheaderdata(numbers):
formattype = fixedlength2number(numbers[0:2])
numtracks = fixedlength2number(numbers[2:4])
timedivision = analyzetimedivision(numbers[4:6])
return (formattype, numtracks, timedivision)

def synthesizeheaderdata(formattype,numtracks,timedivision):
formattype = number2fixedlength(formattype, 2)
numtracks = number2fixedlength(numtracks, 2)
timedivision = synthesizetimedivision(timedivision)
return formattype + numtracks + timedivision

def analyzestatus(statusbyte):
number = statusbyte - (2**7)
eventtype = number // (2**4)
channel = number % (2**4)
return (eventtype, channel)

def synthesizestatus(eventtype,channel):
statusbyte = (2**7) + (2**4) * eventtype + channel
return [statusbyte]

def synthesizeevent(event):
if len(event)==4:
[deltatime, eventtype, channel, parameters] = event
return number2variablelength(deltatime) + synthesizestatus
(eventtype,channel) + parameters
if len(event)==3:
[deltatime, metatype, message] = event
quantifiedmessage = number2variablelength(len(message)) + [ord
(x) for x in message]
return number2variablelength(deltatime) + synthesizestatus
(7,15) + [metatype] + quantifiedmessage

def makechunk(identifier,numbers):
return identifier + number2fixedlength(len(numbers),4) + numbers

def makeheader(formattype,numtracks,timedivision):
headeridentifier = [77, 84, 104, 100]
return makechunk(headeridentifier,synthesizeheaderdata
(formattype,numtracks,timedivision))

def maketrack(events):
trackidentifier = [77, 84, 114, 107]
return makechunk(trackidentifier,concat([synthesizeevent(x) for x
in events]))

def getchunks(numbers):
numbytes = len(numbers)
index = 0
chunks = []
while index < numbytes:
i = index + 4
j = index + 8
k = j + fixedlength2number(numbers[i:j])
index = k
chunks.append(numbers[j:k])
return chunks

def getevent(numbers,startindex,runningstatus):
(i, deltatime) = getvariablelength(numbers,startindex)
deltatime = variablelength2number(deltatime)
(j, status) = smallbyte(numbers) and (i, []) or (i+1,
[numbers])
nextrunningstatus = status or runningstatus
(eventtype, channel) = analyzestatus(nextrunningstatus[0])
if not eventtype==7:
numparameters = eventtype in [4,5] and 1 or 2
(nextstartindex, parameters) = getfixedlength
(numbers,j,numparameters)
event = [deltatime, eventtype, channel,
parameters]
if eventtype==7 and channel==15:
(k, metatype) = (j+1, numbers[j])
(m, messagelength) = getvariablelength(numbers,k)
(nextstartindex, message) = getfixedlength
(numbers,m,variablelength2number(messagelength))
message = ''.join([chr(x) for x in message])
event = [deltatime, metatype, message]
if eventtype==7 and not channel==15:
(k, messagelength) = getvariablelength(numbers,j)
(nextstartindex, message) = getfixedlength
(numbers,k,variablelength2number(messagelength))
event = None
return (nextstartindex, nextrunningstatus, event)

def getevents(numbers):
numbytes = len(numbers)
index = 0
runningstatus = []
events = []
while index < numbytes:
(nextindex, nextrunningstatus, event) = getevent
(numbers,index,runningstatus)
index = nextindex
runningstatus = nextrunningstatus
if not event==None: events.append(event)
return events

def parse(filedata):
numbers = list(filedata)
chunks = getchunks(numbers)
(formattype, numtracks, timedivision) = analyzeheaderdata(chunks
[0])
tracks = [getevents(x) for x in chunks[1:]]
return [formattype, timedivision, tracks]

def unparse(sequence):
[formattype, timedivision, tracks] = sequence
numtracks = len(tracks)
header = makeheader(formattype,numtracks,timedivision)
numbers = header + concat([maketrack(x) for x in tracks])
return bytes(numbers)

########################################
## from midiparser import parse, unparse

def readmidi(filepath):
return parse(open(filepath,'rb').read())

def writemidi(sequence,filepath):
open(filepath,'wb').write(unparse(sequence))

def replace(replacee,replacer,string):
return replacer.join(string.split(replacee))

def notename(notenumber):
names = ('C','C#','D','D#','E','F','F#','G','G#','A','A#','B')
return names[notenumber % 12] + '-' + str(notenumber // 12)

def gettrackname(track):
names = [event[2] for event in track if len(event)==3 and event[1]
==3]
return names and numbers2string(names[0]) or None

def noteevent(event):
return len(event)==4 and event[1] in range(3)

def switchevent(event):
return len(event)==4 and event[1] in range(2)

def firstnoteindices(track):
for i in range(len(track)):
if noteevent(track): return
return []

def lastnoteindices(track):
for i in reversed(range(len(track))):
if noteevent(track): return
return []

def explodefile(filepath,directorypath):
[formattype, timedivision, tracks] = readmidi(filepath)
index = formattype==1 and not firstnoteindices(tracks[0]) and 1 or
0
temposettings, tracks = tracks[:index], tracks[index:]
for i in range(len(tracks)):
trackname = gettrackname(tracks) or ('track_' + str(i))
rewrite = lambda basename: basename + '_' + replace('/', '_',
trackname)
singletrackfilepath = changefilepath
(filepath,directorypath,rewrite)
singletrackfile = (formattype, timedivision, temposettings +
[tracks])
writemidi(singletrackfile,singletrackfilepath)

def reflectpitch(event):
if not noteevent(event): return event
[deltatime, eventtype, channel, parameters] = event
[notenumber, velocity] = parameters
newparameters = [(2**7)-notenumber, velocity]
return [deltatime, eventtype, channel, newparameters]

def translatepitch(event,deltapitch):
if not noteevent(event): return event
[deltatime, eventtype, channel, parameters] = event
[notenumber, velocity] = parameters
newnotenumber = notenumber + deltapitch
assert newnotenumber in range(2**7)
newparameters = [newnotenumber, velocity]
return [deltatime, eventtype, channel, newparameters]

def switch(event):
noteoff, noteon = range(2)
if not switchevent(event): return event
[deltatime, eventtype, channel, parameters] = event
[notenumber, velocity] = parameters
neweventtype = noteon
newvelocity = (eventtype==noteoff or velocity==0) and (2**6) or 0
newparameters = [notenumber, newvelocity]
return [deltatime, neweventtype, channel, newparameters]

def invert(track):
return [reflectpitch(x) for x in track]

def transpose(track,deltapitch):
return [translatepitch(x,deltapitch) for x in track]

def retrograde(track):
prefixindex = firstnoteindices(track)[0]
suffixindex = lastnoteindices(track)[0] + 1
prefix, noteevents, suffix = track[:prefixindex], track
[prefixindex: suffixindex], track[suffixindex:]
newnoteevents = [switch(event) for event in reversed(noteevents)]
nextdeltatime = noteevents[-1][0]
for i in range(len(newnoteevents)):
[deltatime, eventtype, channel, parameters] = newnoteevents
newnoteevents = [nextdeltatime, eventtype, channel,
parameters]
nextdeltatime = deltatime
return prefix + newnoteevents + suffix

def sequences(length,elements):
if length==0: return [[]]
return [[x] + ys for x in elements for ys in sequences
(length-1,elements)]

def toggle(notenumber):
on = [0, 1, 0, [notenumber, (2**7)-1]]
off = [300, 0, 0, [notenumber, 0]]
return [on, off]

def eartrainer(notenumbers):
from functools import reduce
endoftrack = [0, 47, []]
track = reduce(lambda x,y: x+y, [toggle(x) for x in notenumbers])
+ [endoftrack]
return [0, [120], [track]]

def makeflashcards(length,lowest,highest):
from os import mkdir
from random import shuffle
mkdir('questions')
mkdir('answers')
notesequences = sequences(length, range(lowest, highest + 1))
shuffle(notesequences)
for i in range(len(notesequences)):
writemidi(eartrainer(notesequences), 'questions/sequence_'
+ str(i) + '.mid')
open('answers/sequence_' + str(i) + '.txt','w').write(' '.join
([notename(x) for x in notesequences]))

def noemptytracks(mididata):
[formattype, timedivision, tracks] = mididata
index = (formattype==1 and not firstnoteindices(tracks[0])) and 1
or 0
temposettings, tracks = tracks[:index], tracks[index:]
newtracks = temposettings + [track for track in tracks if
firstnoteindices(track)]
return [formattype, timedivision, newtracks]

def nocountin(mididata):
[formattype, timedivision, tracks] = mididata
TrackEventOldTime = [(i,j,tracks[j][0]) for i in range(len
(tracks)) for j in firstnoteindices(tracks)]
starttime = min([t for (i,j,t) in TrackEventOldTime])
TrackEventNewTime = [(i,j,t-starttime) for (i,j,t) in
TrackEventOldTime]
newtracks = tracks[:]
for (i,j,t) in TrackEventNewTime: newtracks[j][0] = t
return [formattype, timedivision, newtracks]

def processfiles(directorypath,function):
from os import listdir, mkdir
filenames = listdir(directorypath)
subdirectorypath = directorypath + '/preprocessed'
mkdir(subdirectorypath)
for filename in filenames:
oldfilepath = directorypath + '/' + filename
newfilepath = subdirectorypath + '/' + filename
writemidi(function(readmidi(oldfilepath)),newfilepath)
 
Ad

Advertisements


Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top