albumcutter - Unnamed repository; edit this file 'description' to name the repository.

commit 92ece2d65d95880986d481d296360a6ba6a0e602
parent a870530c5ca0747477fa311a151a20f397632c49
Author: Paul Longtine <paullongtine@gmail.com>
Date:   Wed Nov  4 14:16:58 2015

inital commit

Diffstat:
 bin/ac             |  32 +++++++++++++-
 src/albumcutter.py | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 166 insertions(+), 0 deletions(-)
diff --git a/bin/ac b/bin/ac
@@ -0,0 +1,32 @@
+#!/usr/bin/env python2
+import pyperclip
+from albumcutter import AlbumCutter
+from optparse import OptionParser
+
+def main():
+	options, args = parsearg()
+	if len(args) == 0:
+		print("Missing URL.")
+		return
+	t = get_tracklist()
+
+	AlbumCutter(args[0], t, options.dir)
+
+def parsearg():
+	parser = OptionParser()
+
+	parser.add_option("-d", dest="dir", default="album",
+	                  help="Destination album", metavar="<dir>")
+
+	return parser.parse_args()
+
+def get_tracklist():
+	while 1:
+		print("Using tracklist in clipboard:")
+		print(pyperclip.paste())
+		if raw_input("\n[y/n]:") != 'n': break
+
+	return pyperclip.paste()
+
+if __name__ == "__main__":
+	main()
diff --git a/src/albumcutter.py b/src/albumcutter.py
@@ -0,0 +1,134 @@
+""""
+ AlbumCutter is a program that uses `youtube-dl` and `pydub` to download albums
+ off of youtube and break them into individual files under a specified directory
+
+ This class provides the functions to
+  * Obtain the file
+  * Parse track listing
+  * From the track listing, splice the audio file into individual tracks and
+    export them into the filesystem
+
+ AlbumCutter is intialized with the URL of the youtube vdieo, followed by a
+ string containing the tracklist (delimited by newlines), along with the output
+ directory.
+
+ Copyright (c) Paul Longtine <paul@nanner.co>
+"""
+import re, os
+from subprocess import call, Popen, PIPE
+from pydub import AudioSegment
+
+class AlbumCutter:
+	def __init__( self, url, tracklist, output ):
+		self.tracklist = tracklist
+		self.url       = url
+		self.output    = output
+		self.fname     = None
+		self.audio     = None
+		self.tracks    = None
+		
+		# Make the directory
+		if os.path.exists(output):
+			print("ERROR: Directory exists, aborting")
+			return
+
+		try:
+			os.mkdir(output)
+		except:
+			print("ERROR: Could not make directory ({})".format(output))
+			return
+
+		assert self.get_audio(self.url)
+		self.process_tracklist(self.tracklist)
+		self.export(self.output)
+
+	# fetches audio from URL.
+	def get_audio( self, url ):
+		print("Downloading audio...")
+		# youtube-dl -q -x -o$(output)/$(VIDEO ID).$(EXTENSION)
+		if not call(['youtube-dl', '-q', '-x',
+		             '-o{}/%(id)s.%(ext)s'.format(self.output), url]):
+			return False
+
+		# This second call here finds the filename for the audio just downloaded
+		# It's not elegant at all, and it was the best solution I could find.
+		# The name of the file ends up in `output` with a newline at the end.
+		p = Popen(['youtube-dl', '--get-filename', '--skip-download',
+		           '-x', '-o{}/%(id)s.%(ext)s'.format(self.output), url],
+		           stdin=PIPE, stdout=PIPE, stderr=PIPE)
+		output, err = p.communicate()
+		#removes pesky newline, if it exists
+		output = output[:-1] if output[-1] == "\n" else output
+
+		if err != '':
+			print(err)
+			return False
+
+		# Loads file into the fancy manipulator thingmajig I found on the web
+		print("Trying to load file ({})".format(output))
+		try:
+			#TODO actually figure out proper encoding memnonic for this function
+			#     Currently, it just assumes the extension IS the memnonic...
+			#                ...which is bad. We don't want that.
+			self.audio = AudioSegment.from_file(output, output.split(".")[1])
+		except Exception, e:
+			print("ERROR: Could not load audio\n{}".format(e))
+
+		print("Loaded audio")
+
+		self.fname = output
+
+	# gives meaning to tracklist using format:
+	# [ { 'start': <starting time>, 'end': <ending time>, 'title': <title> }, ]
+	def process_tracklist( self, tracklist):
+		tracks = []
+		# Regex for finding hour, minute, and second. I am proud of this.
+		rt = re.compile(r'((?P<hr>\d+):)?((?P<min>\d+):)(?P<sec>\d+)')
+		for track in tracklist.split("\n"):
+			# Finds hour, minute and second and converts it into miliseconds
+			times = rt.search(track)
+			ms = self.to_ms(times.group('hr'),
+			                times.group('min'),
+			                times.group('sec'))
+			# If time is not a thing, continue. This is not a track listing
+			if ms == None:
+				continue
+			# Remove timestamp and prettify the title
+			title = rt.sub('', track)
+			title = re.sub('[^-a-zA-Z0-9_.() ]+', '', title)
+			title = title.strip()
+			# Get prevous track and set the end to this start time
+			if len(tracks) > 0:
+				if type(tracks[-1]) == dict:
+					tracks[-1]["end"] = ms
+			tracks.append({"start":ms,
+			               "end":-1,
+			               "title":title})
+		self.tracks = tracks
+
+	# Converts hours, minutes, and seconds into ms and adds them into one number
+	def to_ms( self, hour, minute, second ):
+		hour   = 0 if hour == None else int(hour)
+		minute = 0 if minute == None else int(minute)
+		try:
+			second = int(second)
+		except:
+			return None
+		#             ms in hour        ms in minute        ms in second
+		return (hour * 3600000) + (minute * 60000) + (second * 1000)
+
+	# Exports tracks into specified directory
+	def export( self, directory ):
+		assert self.tracks != None
+		# Loop through tracks and process them
+		for track in self.tracks:
+			print("Processing track: '{}'".format(track['title']))
+			t = self.audio[track['start']:track['end']]
+			path = os.path.join(directory, "{}.mp3".format(track['title']))
+			try:
+				t.export(path, format="mp3")
+			except:
+				print("ERROR: Could not export ({})".format(path))
+
+	def __del__( self ):
+		del self.audio
	albumcutter Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs