Converts PDF/DAT/TXT to mp3 (abook).
Posted: Sat Aug 21, 2010 2:44 pm
If you are like me, likes to listen to conversation/speech while doing something (coding) or don't want to read a book/article/report but rather lissen to it, you can easily do so in any *nix (Ubuntu) environment.
Just copy this python script into the "pdf2mp3.py" file and make it executable and move it to"/usr/bin" for system-wide use.
In a Ubuntu system, be sure to install the following packages
Useage: "pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3" (for the english voice 'en', for german voice 'de', type: espeak --voices to get list of voices available on your system)
I have tested this script, works like a charm.
Cheers!
Just copy this python script into the "pdf2mp3.py" file and make it executable and move it to"/usr/bin" for system-wide use.
Code: Select all
#!/usr/bin/python
# ###################################################
# pdf2mp3.py - little script/program to convert a
# pdf-file or ascii-file (.dat, .txt) into a mp3 audio or wav file
#
# Copyright (C) 2010 Hannes Rennau
# hannes@bolding-burchard.com
#
# Enhanced txt/pdf to mp3 conversion by Andy Holst
# andy.holst85@gmail.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
# ###################################################
# LIST OF PACKAGES NEEDED:
# you need to install the following packages:
# sudo apt-get install python poppler-utils festival festvox-rablpc16k lame espeak wavbreaker
# HOW TO USE:
# 1.create a file with the name pdf2mp3 and copy the content of
# the whole text in there
# 2.make the file an executable via:
# >>> chmod +x pdf2mp3
# 3.copy file to /usr/bin to make usage of program possible from everywhere on your computer:
# >>> sudo cp pdf2mp3 /usr/bin/
# 4.after that get help calling:
# pdf2mp3 -h
#
# 5.EXAMPLE:
# you want to convert yourfilename.pdf into a mp3 file, then just type:
# pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3 (for the english voice 'en',
# for german voice 'de', type:
# espeak --voices to get list
# of voices available on your system)
#
#
import os,sys
import string
def main():
"""Parses command line
"""
import optparse
parser = optparse.OptionParser(usage='%prog -v [de,en,...] -f filename[.pdf|.txt|.dat] -o [wav|mp3] [optional: --ascii]',
description="""This script convertes ASCII files (basically those files with extension .txt or .dat) or pdf
files into an mp3 (or wav) audio file.""",version=r'$v0.1$')
parser.add_option('-v','--voice', type='string',metavar='VOICENAME', help='name of the voice to \
be used. type: ***espeak --voices*** to get list of available voices on your system.')
parser.add_option('-f','--file', type='string',metavar='SOURCEFILENAME',help='input path of file to \
read (and late on convert to audio file). This can be a pdf or ascii (.txt or .dat)\
file. extension must be given!')
parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME', help='Output filename (with extension\
.wav or .mp3 that the program knows which audio format you want.)')
options,args = parser.parse_args()
if options.voice is None:
print 'no voice name given, use -v voicename [type ***espeak --voices*** for list of available voices]'
return 2
if options.file is None:
print 'no input file name given [please use: -f filename]'
return 2
if options.output is None:
print 'no output file name given [please use: -o outputfilename.[wav|mp3]'
return 2
filename_inp=str(options.file)
filename_out=str(options.output)
ifwav=0
ifmp3=0
if filename_out[-4:]!='.wav' and filename_out[-4:]!='.mp3':
print 'please decide whether you want wav or ,p3 format by typing -o filename.wav or -o filename.mp3'
elif filename_out[-4:]=='.wav':
ifwav=1
else:
ifmp3=1
if os.path.isfile(filename_inp) and (ifwav or ifmp3):
if filename_inp[-4:]!='.dat' and filename_inp[-4:]!='.txt' and filename_inp[-4:]!='.pdf':
print '*** input file does not have extension (.txt, .dat, .pdf) ***' #---
elif filename_inp[-4:]=='.pdf':
print 'converting pdf file: ' + filename_inp + ' to ASCII'
pdf_convert_to_ascii(filename_inp)
if ifwav:
convert_to_wav(filename_inp[:-4] + '.txt',filename_out,options.voice)
join_wav_files(filename_out)
elif ifmp3:
convert_to_mp3(filename_inp[:-4] + '.txt',filename_out,options.voice)
elif filename_inp[-4:]=='.dat' or filename_inp[-4:]=='.txt':
if ifwav:
convert_to_wav(filename_inp,filename_out,options.voice)
join_wav_files(filename_out)
elif ifmp3:
convert_to_mp3(filename_inp,filename_out,options.voice)
else:
print '*** input file does not exist ***'
print ifmp3,ifwav
def pdf_convert_to_ascii(input_pdf_file):
os.popen('pdftotext ' + input_pdf_file + ' ' + input_pdf_file[:-4] + '.txt')
def convert_to_wav(input_ascii_file,output_wav_file,language):
#os.popen('cat ' + input_ascii_file +\
# '|sed \'s/[^a-zA-Z .,!?]//g\'|text2wave -o ' + input_ascii_file[:-4] + '.wav')
os.popen('cat ' + input_ascii_file +\
'|sed \'s/[^a-zA-Z .,!?]//g\'|espeak -v' + language + ' -w ' + output_wav_file[:-4] + '.wav')
def convert_to_mp3(input_ascii_file,output_wav_file,language):
os.popen('cat ' + input_ascii_file +\
'|sed \'s/[^a-zA-Z .,!?]//g\'|espeak -v' + language + ' --stdout | lame - ' + output_wav_file[:-4] + '.mp3')
def convert_wav_2_mp3(input_wav_file):
os.popen('lame -f ' + input_wav_file[:-4] + '.wav ' + input_wav_file[:-4] + '.mp3')
os.popen('rm -f ' + input_wav_file[:-4] + '.wav')
def nr_wav_files(valid_path,filename_out): # espeak creates multiple wav, how many is obtained here
x = 0
for root, dirs, files in os.walk(valid_path):
for f in files:
if str.find(f,filename_out[:-4] +'.wav_')>=0:
x = x+1
return x
def join_wav_files(filename_out):
join_wav=filename_out[:-4]+'.wav '
for wav_files in range(nr_wav_files('.',filename_out)-1):
if wav_files<9:
join_wav+=filename_out[:-4]+'.wav_0' + str(wav_files+1) + ' '
else:
join_wav+=filename_out[:-4]+'.wav_' + str(wav_files+1) + ' '
os.system('wavmerge -o merged.wav ' + join_wav)
#os.system('qwavjoin ' + join_wav)
os.system('rm -f ' + '*.wav_*')
os.system('mv merged.wav ' + filename_out[:-4] + '.wav')
if __name__=='__main__':
ret = main()
sys.exit(ret)
Code: Select all
sudo apt-get install python poppler-utils festival festvox-rablpc16k lame espeak wavbreaker
I have tested this script, works like a charm.
Cheers!