The script uses 3 programs rtmpdump, ffmpeg and lame. rtmpdump is a util to download a stream using rtmp protocol. It can be easily compiled; it depends on libssl and zlib. ffmpeg and lame should be available for any LInux distribution, I compiled them for MAC.
The first step is to download the contents with rtmpdump. It's not a good idea to download everything in one go; it might take a lot of time. I've implemented a multithread mechanism instead. The number of threads and the number of seconds to download can be configured. Each downloaded audio chunk is stored in a file.
The second step is convert all the chunk to pcm raw format and concatenate all of them in a single file. The ffmpeg program is used to do so.
Finally, the raw file is converted to mp3 using lame.
The script uses a considerable amount of disk space for the temporary files. I think the script can be used, with a little modification to download video streams but I haven't tried that.
Here is the code:
#!/usr/bin/env python
import sys
import subprocess
import os
import threading
def executeCommand (cmd):
p1 = subprocess.Popen (cmd, stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
out = p1.communicate()
return out
class ChunkDownloaderThread (threading.Thread):
def __init__ (self, downloader):
threading.Thread.__init__(self)
self._downloader = downloader
def run (self):
exitLoop = False
while not exitLoop:
cmd = self._downloader.getNextThreadCmd ()
if cmd == None:
exitLoop = True
else:
executeCommand (cmd[0])
sys.stdout.write ( str(cmd[1]) + ' ')
sys.stdout.flush ()
class Downloader (object):
_rtmpDumpProgram = 'rtmpdump'
_ffmpegProgram = 'ffmpeg'
_lameProgram= 'lame'
_tmpFile = 'tempFile'
_tmpExtension = '.flv'
_rawExtension = '.raw'
def __init__ (self, chunkSize, noOfThreads):
""" The class constructor.
chunkSize: the size in seconds of each file chunk.
noOfThreads: the number of simultaneaous threads to use. """
self._chunkSize = chunkSize
self._noOfThreads = noOfThreads
def cleanTempFiles (self):
extensions = [self._tmpExtension, self._rawExtension ]
for e in extensions:
fileName = self._tmpFile + e
if os.path.isfile (fileName):
os.remove (fileName)
index = 0
exitLoop = False
while not exitLoop:
fileName = self._tmpFile + str(index) + self._tmpExtension
if os.path.isfile (fileName):
os.remove (fileName)
else:
exitLoop = True
index += 1
def prepareDownloadCmd (self, url, destinationFile):
cmd = [self._rtmpDumpProgram]
cmd += ['-r', url, '-o', destinationFile]
return cmd
def findDuration (self, url):
duration = 0.0
fileName = self._tmpFile + self._tmpExtension
cmd = self.prepareDownloadCmd (url, fileName)
# Download just one second
cmd += ['-B', '1']
output = executeCommand(cmd)[1]
output = output.split ('\n')
for line in output:
durationStr = 'duration'
infoStr = 'INFO:'
pos = line.find (infoStr)
if pos > -1:
pos = line.find (durationStr)
if pos > -1:
duration = line[pos + len(durationStr):].strip()
duration = float(duration)
break
self.cleanTempFiles()
return duration
def downloadChunk (self, url, tempFile, firstSecond=0.0, lastSecond=0.0):
cmd = self.prepareDownloadCmd (url, tempFile)
if firstSecond != 0.0:
cmd += ['-A', str(firstSecond)]
if lastSecond != 0.0:
cmd += ['-B', str(lastSecond)]
return cmd
def getNextThreadCmd (self):
retVal = None
self.theLock.acquire (True)
if self.currentChunk < self.totalChunks:
auxIndex = self.currentChunk
cmd = self.downloadChunk (self.url, self.chunkList[auxIndex][2],
self.chunkList[auxIndex][0],
self.chunkList[auxIndex][1])
retVal = [cmd, auxIndex]
self.currentChunk += 1
self.theLock.release ()
return retVal
def downloadFile (self, url):
duration = self.findDuration (url)
self.chunkList = []
chSize = float (self._chunkSize)
begin = 0.0
end = chSize
index = 0
while (begin < duration):
if end >= duration:
end = 0.0
fileName = self._tmpFile + str(index) + self._tmpExtension
self.chunkList.append ([begin, end, fileName])
begin += chSize
end += chSize
index += 1
self.totalChunks = index
self.currentChunk = 0
self.url = url
self.theLock = threading.Lock()
index = 0
sys.stdout.write ('Total ' + str(self.totalChunks) + '\n')
sys.stdout.flush ()
threads = []
for i in range (0, self._noOfThreads):
t = ChunkDownloaderThread (self)
threads.append (t)
for t in threads:
t.start()
for t in threads:
t.join()
sys.stdout.write ('\n')
sys.stdout.flush ()
return self.totalChunks
def concatenateFile (self, totalChunks):
# Flv to raw command
cmd = [self._ffmpegProgram, '-i']
completeCmd = ['-vn', '-f', 'u16le', '-acodec', 'pcm_s16le',
'-ac', '2', '-ab', '128k', '-ar', '44100', '-']#, '<', '/dev/null']
tempRawFile = self._tmpFile + self._rawExtension
f = open (tempRawFile, "wb")
sys.stdout.write ('Concatenate: \n')
sys.stdout.flush ()
for i in range (0, totalChunks):
flvFile = self._tmpFile + str(i) + self._tmpExtension
toExe = cmd + [flvFile] + completeCmd
output = executeCommand (toExe)
f.write (output[0])
sys.stdout.write (str (i) + ' ')
sys.stdout.flush ()
# Delete the chunk to save disk space
os.remove (flvFile)
sys.stdout.write ('\n')
sys.stdout.flush ()
f.close ()
def convertToMp3 (self, destination):
tempRawFile = self._tmpFile + self._rawExtension
cmd = [self._lameProgram, '-r', '-s', '44.1', '--preset', 'cd',
tempRawFile, destination]
sys.stdout.write ('Converting to mp3\n')
sys.stdout.flush ()
executeCommand (cmd)
sys.stdout.write ('Done\n')
sys.stdout.flush ()
def downloadAndConvertFile (self, url, destination):
totalChunks = self.downloadFile (url)
self.concatenateFile (totalChunks)
self.convertToMp3 (destination)
self.cleanTempFiles ()
if __name__ == '__main__':
D = Downloader(60, 15)
D.downloadAndConvertFile (sys.argv[1], sys.argv[2])
No hay comentarios:
Publicar un comentario