Huffman encoder in 8086 ASM

While working on my 8086 emulator, I figured I might write something nice in 8086 assembly as well. This is what I came up with. It’s probably not useful to anyone, anyway, here it is:

The expected tree for the test string “1234565″:

; assemble with "nasm -f bin huffman.asm -o huffman.com
[bits 16]
[org 100h]

; initialize frequency array
xor ax, ax
lea di, [freq]
mov cx, 100h
rep stosw

; initialize tree
lea di, [tree]
mov cx, 400h
rep stosw

; determine frequency of each character
lea si, [data]
mov cx, 0007h
freq_loop:
lodsb
mov bx, ax
; no SIB byte… ftl :/
shl bx, 01h
inc word [bx+freq]
shr bx, 01h
loop freq_loop

; for each character with a frequency != 0 create a leaf node and add it to the queue
lea bp, [tree]
xor bx, bx
mov dx, bx
ln_addq_loop:
mov ax, [bx+freq]
test ax, ax
jz ln_addq_nofreq

shr bx, 01h
mov word [bp], bx ; character 00h through ffh (one byte added for padding)
mov word [bp+02h], ax ; weight
mov word [bp+04h], 0000h ; left node
mov word [bp+06h], 0000h ; right node
shl bx, 01h

call pq_push
add bp, 08h

ln_addq_nofreq:
add bx, 02h
cmp bx, 0200h
jl ln_addq_loop

tree_create_loop:
cmp word [queue_size], 0001h
je tree_create_exit_loop

mov word [bp], 0000h ; character (not used in internal nodes)
mov word [bp+02h], 0000h ; weight

call pq_pop
mov word [bp+04h], ax ; left node
mov bx, ax
mov bx, [bx+02h]
add [bp+02h], bx ; add to weight

call pq_pop
mov word [bp+06h], ax ; right node
mov bx, ax
mov bx, [bx+02h]
add [bp+02h], bx ; add to weight

call pq_push
add bp, 08h

jmp tree_create_loop
tree_create_exit_loop:

xor cx, cx
xor dx, dx
push word [pqueue]
call build_alpha

mov cx, 0007h
lea si, [data]
print_bstream:
mov bp, [pqueue]
lodsb
mov ah, 02h
mov dl, al
int 21h
push ax
mov dl, 20h
int 21h
pop ax
xor ah, ah
shl ax, 1
lea bx, [alpha]
add bx, ax
mov bx, [bx]
bstream_byte_loop:
xor dx, dx
or dx, [bp+04h]
or dx, [bp+06h]
test dx, dx
jz bstream_byte_loop_end

mov dl, 30h
test bl, 01h
jnz bs_right

bs_left:
mov bp, [bp+04h]
jmp bs_next

bs_right:
inc dl
mov bp, [bp+06h]

bs_next:
shr bl, 01h
mov ah, 02h
int 21h
jmp bstream_byte_loop
bstream_byte_loop_end:
mov ah, 09h
lea dx, [nl]
int 21h
loop print_bstream

xor ah, ah
int 21h

; node address should be stored in DS:BP
pq_push:
push bx
mov bx, [queue_size]
shl bx, 1
lea bx, [bx+pqueue]
mov [bx], bp
inc word [queue_size]
call pq_sort
pop bx
ret

; node address will be returned in DS:AX
pq_pop:
push bx
mov bx, [queue_size]
test bx, bx
jz pop_nel

dec bx
mov [queue_size], bx
shl bx, 01h
mov ax, [bx+pqueue]

pop_nel:
pop bx
ret

; bubble-sort lulz
pq_sort:
cmp word [queue_size], 0002h
jl nel

pusha

pq_sort_ml:
xor dx, dx
lea bx, [pqueue]
mov cx, [queue_size]
dec cx
pq_sort_il:
mov bp, [bx]
mov ax, [bp+02h]

mov bp, [bx+02h]
mov bp, [bp+02h]

cmp ax, bp
jnl noswap

mov ax, [bx]
mov bp, [bx+02h]
mov [bx], bp
mov [bx+02h], ax
mov dx, 01h

noswap:
add bx, 02h
loop pq_sort_il

test dx, dx
jnz pq_sort_ml

popa

nel:
ret

build_alpha:
mov bx, sp
mov bx, [bx+02h]

ba_recurseLeft:
mov ax, [bx+04h]
test ax, ax
jz ba_recurseRight

ba_recursLeft:
pusha
inc cx
shr dx, 01h
push ax
call build_alpha
pop ax
popa

ba_recurseRight:
mov ax, [bx+06h]
test ax, ax
jz ba_finish

pusha
inc cx
shr dx, 01h
or dx, 80h
push ax
call build_alpha
pop ax
popa

jmp ba_end

ba_finish:
mov ax, [bx+04h]
test ax, ax
jnz ba_end

mov bx, [bx]
mov ax, 0008h
sub ax, cx
xchg ax, cx
shr dx, cl
shl bx, 01h
mov [bx+alpha], dx
shr bx, 01h

ba_end:
ret

data db ’1′, ’2′, ’3′, ’4′, ’5′, ’6′, ’5′
queue_size dw 0000h
nl db 0dh, 0ah, ‘$’

[section .bss]
alpha:
freq resw 100h
tree resd 200h
pqueue resw 100h

Comments (0)

› No comments yet.

Leave a Reply

Allowed Tags - You may use these HTML tags and attributes in your comment.

<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

Pingbacks (0)

› No pingbacks yet.