Rodolfo technical: enero 2012

I've created this python script to download a flash stream audio and then convert it to mp3. My idea was to download a radio program so I can listen to it later on a not-connected portable device. This script can't be used for live streams.

The script uses 3 programs rtmpdump, ffmpeg and lame. rtmpdump is a util to download a stream using rtmp protocol. It can be easily compiled; it depends on libssl and zlib. ffmpeg and lame should be available for any LInux distribution, I compiled them for MAC.

The first step is to download the contents with rtmpdump. It's not a good idea to download everything in one go; it might take a lot of time. I've implemented a multithread mechanism instead. The number of threads and the number of seconds to download can be configured. Each downloaded audio chunk is stored in a file.

The second step is convert all the chunk to pcm raw format and concatenate all of them in a single file. The ffmpeg program is used to do so.

Finally, the raw file is converted to mp3 using lame.

The script uses a considerable amount of disk space for the temporary files. I think the script can be used, with a little modification to download video streams but I haven't tried that.

Here is the code:


#!/usr/bin/env python

import sys
import subprocess
import os
import threading

def executeCommand (cmd):
    p1 = subprocess.Popen (cmd, stdout = subprocess.PIPE, 
            stderr = subprocess.PIPE)
    out = p1.communicate()
    return out

class ChunkDownloaderThread (threading.Thread):
    def __init__ (self, downloader):
        threading.Thread.__init__(self)
        self._downloader = downloader

    def run (self):
        exitLoop = False
        while not exitLoop:
            cmd = self._downloader.getNextThreadCmd ()
            if cmd == None:
                exitLoop = True
            else:
                executeCommand (cmd[0])
                sys.stdout.write ( str(cmd[1]) + ' ')
                sys.stdout.flush ()

class Downloader (object):
    _rtmpDumpProgram = 'rtmpdump'
    _ffmpegProgram = 'ffmpeg'
    _lameProgram= 'lame'
    _tmpFile = 'tempFile'
    _tmpExtension = '.flv'
    _rawExtension = '.raw'

    def __init__ (self, chunkSize, noOfThreads):
        """ The class constructor. 
            chunkSize: the size in seconds of each file chunk. 
            noOfThreads: the number of simultaneaous threads to use. """
        self._chunkSize = chunkSize
        self._noOfThreads = noOfThreads
        


    def cleanTempFiles (self):
        extensions = [self._tmpExtension, self._rawExtension ]
        for e in extensions:
            fileName = self._tmpFile + e
            if os.path.isfile (fileName):
                os.remove (fileName)

        index = 0
        exitLoop = False
        while not exitLoop:
            fileName = self._tmpFile + str(index) + self._tmpExtension
            if os.path.isfile (fileName):
                os.remove (fileName)
            else:
                exitLoop = True
            index += 1
            


    def prepareDownloadCmd (self, url, destinationFile):
        cmd = [self._rtmpDumpProgram]
        cmd +=  ['-r', url, '-o', destinationFile]
        return cmd

    def findDuration (self, url):
        duration = 0.0
        fileName = self._tmpFile + self._tmpExtension
        cmd = self.prepareDownloadCmd (url, fileName)

        # Download just one second
        cmd += ['-B', '1']
        output = executeCommand(cmd)[1]
        output = output.split ('\n')
        for line in output:
            durationStr = 'duration' 
            infoStr = 'INFO:'
            pos = line.find (infoStr)
            if pos > -1:
                pos = line.find (durationStr)
                if  pos > -1:
                    duration = line[pos + len(durationStr):].strip()
                    duration = float(duration)
                    break
        self.cleanTempFiles()
        return duration
        
    def downloadChunk (self, url, tempFile, firstSecond=0.0, lastSecond=0.0):
        cmd = self.prepareDownloadCmd (url, tempFile)
        if firstSecond != 0.0:
            cmd += ['-A', str(firstSecond)]
        if lastSecond != 0.0:
            cmd += ['-B', str(lastSecond)]
        return cmd 

    def getNextThreadCmd (self):
        retVal = None
        self.theLock.acquire (True)
        if self.currentChunk < self.totalChunks: 
            auxIndex = self.currentChunk 
            cmd = self.downloadChunk (self.url, self.chunkList[auxIndex][2],
                            self.chunkList[auxIndex][0],
                            self.chunkList[auxIndex][1])
            retVal  = [cmd, auxIndex]
            self.currentChunk += 1
            
        self.theLock.release ()

        return retVal

    def downloadFile (self, url):
        duration = self.findDuration (url)

        self.chunkList = []
        chSize = float (self._chunkSize)
        begin = 0.0
        end = chSize
        index = 0
        while (begin < duration):
            if end >= duration:
                end = 0.0
            fileName = self._tmpFile + str(index) + self._tmpExtension
            self.chunkList.append ([begin, end, fileName])
            begin += chSize
            end += chSize
            index += 1

        self.totalChunks = index
        self.currentChunk = 0
        self.url = url
        self.theLock = threading.Lock()
        index = 0

        sys.stdout.write ('Total ' + str(self.totalChunks) + '\n')
        sys.stdout.flush ()

        threads = []
        for i in range (0, self._noOfThreads):
            t = ChunkDownloaderThread (self)
            threads.append (t)

        for t in threads:
            t.start()
        for t in threads:
            t.join()

        sys.stdout.write ('\n')
        sys.stdout.flush ()
        return self.totalChunks

    def concatenateFile (self, totalChunks):
        # Flv to raw command
        cmd = [self._ffmpegProgram, '-i']
        completeCmd = ['-vn', '-f', 'u16le', '-acodec', 'pcm_s16le', 
            '-ac', '2', '-ab', '128k', '-ar', '44100', '-']#, '<', '/dev/null']
        tempRawFile = self._tmpFile + self._rawExtension
        f = open (tempRawFile, "wb")
        sys.stdout.write ('Concatenate: \n')
        sys.stdout.flush ()
        for i in range (0, totalChunks):
            flvFile = self._tmpFile + str(i) + self._tmpExtension
            toExe = cmd + [flvFile] + completeCmd

            output = executeCommand (toExe)
            f.write (output[0])
            sys.stdout.write (str (i) + ' ')
            sys.stdout.flush ()
            # Delete the chunk to save disk space
            os.remove (flvFile)
        sys.stdout.write ('\n')
        sys.stdout.flush ()

        f.close ()


    def convertToMp3 (self, destination):
        tempRawFile = self._tmpFile + self._rawExtension
        cmd = [self._lameProgram, '-r', '-s', '44.1', '--preset', 'cd', 
            tempRawFile, destination]
        sys.stdout.write ('Converting to mp3\n')
        sys.stdout.flush ()
        executeCommand (cmd)
        sys.stdout.write ('Done\n')
        sys.stdout.flush ()

    def downloadAndConvertFile (self, url, destination):
        totalChunks = self.downloadFile (url)
        self.concatenateFile (totalChunks)
        self.convertToMp3 (destination)
        self.cleanTempFiles ()


if __name__ == '__main__':
    D = Downloader(60, 15)

    D.downloadAndConvertFile (sys.argv[1], sys.argv[2])

Rodolfo technical

domingo, 22 de enero de 2012

Python's urllib2 and podomatic

sábado, 21 de enero de 2012

Downloading a flash stream audio and convert it to a mp3

Archivo del blog