Bresenhem's Line Algorithm in MIPS Assembly

This is my first time implementing this algorithm, and it was definitely interesting to write in assembly. It did take me ~3 hours, but I was watching the Lakers get bullied at the same time. Anyways, I'm putting this here so I can be reminded of it later.
P.S. Markdown does not support assembly syntax highlighting; sorry.

	# Draw a line to the screen using Bresenham's line algorithm
	# $a0, $a1 = x0, y0
	# $a2, $a3 = x1, y1
	# (https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm)
draw_line:
	# Save $s0-5, and $ra
	sw $s0, -4($sp)
	sw $s1, -8($sp)
	sw $s2, -12($sp)
	sw $s3, -16($sp)
	sw $s4, -20($sp)
	sw $s5, -24($sp)
	sw $ra, -28($sp)
	addi $sp, $sp, -28
	
	sub $s0, $a2, $a0 # dx
	sub $s1, $a3, $a1 # dy
	
	# If |dy| > |dx|, slope > 1 and function should go from y0 to y1
	abs $t0, $s0
	abs $t1, $s1
	bge $t1, $t0, line_y
	
	# If x1 < x0, x1 should come first
	bge $a2, $a0, line_x_body
	or $t0, $a0, $0 # swap x0 and x1
	or $a0, $a2, $0
	or $a2, $t0, $0
	
	or $t0, $a1, $0 # swap y0 and y1
	or $a1, $a3, $0
	or $a3, $t0, $0
	
	# Switch signs of dx and dy
	mul $s0, $s0, -1
	mul $s1, $s1, -1
	
	line_x_body:
	# Calculate byte offset at point 1
	sll $t0, $a1, 9
	add $t0, $a0, $t0
	sll $a0, $t0, 2 # Store byte offset in $a0 for draw pixel
	
	# If dy < 0, flip sign of increment and dy
	li $s2, 2048 # y increment
	bge $s1, 0, line_y_inc_tail
	li $s2, -2048
	mul $s1, $s1, -1
	
	line_y_inc_tail:
	# Calculate D
	sll $s3, $s1, 1
	sub $s3, $s3, $s0 # D = (2 * dy) - dx
	
	# For x from x0 to x1, plot (x,y)
	li $s4, 0
	line_x_loop:
	beq $s4, $s0, line_tail
	jal draw_pixel
	
	# If D > 0, add increment to y; update D accordingly
	ble $s3, 0, line_x_D_le
	add $a0, $a0, $s2 # y = y + yi
	
	# D = D + (2 * (dy - dx))
	sub $t0, $s1, $s0
	sll $t0, $t0, 1
	add $s3, $s3, $t0
	j line_x_D_tail
	
	line_x_D_le:
	# D <= 0; D = D + 2*dy
	sll $t0, $s1, 1
	add $s3, $s3, $t0
	
	line_x_D_tail:
	addi $a0, $a0, 4
	addi $s4, $s4, 1
	j line_x_loop
	
	line_y:
	# If y1 < y0, swap points
	bge $a3, $a1, line_y_body
	or $t0, $a0, $0 # swap x0 and x1
	or $a0, $a2, $0
	or $a2, $t0, $0
	
	or $t0, $a1, $0 # swap y0 and y1
	or $a1, $a3, $0
	or $a3, $t0, $0
	
	# Switch signs of dx and dy
	mul $s0, $s0, -1
	mul $s1, $s1, -1
	
	line_y_body:
	# Calculate byte offset at point 1
	sll $t0, $a1, 9
	add $t0, $a0, $t0
	sll $a0, $t0, 2 # Store byte offset in $a0 for draw pixel
	
	# If dx < 0, flip sign of increment and dx
	li $s2, 4 # xi
	bge $s1, 0, line_x_inc_tail
	li $s2, -4
	mul $s0, $s0, -1
	
	line_x_inc_tail:
	# Calculate D
	sll $s3, $s0, 1
	sub $s3, $s3, $s1 # D = (2 * dx) - dy
	
	# For y from y0 to y1, plot (x, y)
	li $s4, 0
	line_y_loop:
	beq $s4, $s1, line_tail
	jal draw_pixel
	
	# If D > 0, add increment to x; update D accordingly
	ble $s3, 0, line_y_D_le
	add $a0, $a0, $s2 # x = x + xi
	
	# D = D + (2 * (dx - dy))
	sub $t0, $s0, $s1
	sll $t0, $t0, 1
	add $s3, $s3, $t0
	j line_y_D_tail
	
	line_y_D_le:
	# D <= 0; D = D + 2*dx
	sll $t0, $s0, 1
	add $s3, $s3, $t0
	
	line_y_D_tail:
	addi $a0, $a0, 2048
	addi $s4, $s4, 1
	j line_y_loop
	
	line_tail:
	# Load saved registers and return
	lw $ra, ($sp)
	lw $s5, 4($sp)
	lw $s4, 8($sp)
	lw $s3, 12($sp)
	lw $s2, 16($sp)
	lw $s1, 20($sp)
	lw $s0, 24($sp)
	addi $sp, $sp, 28
	jr $ra