x86 machine-code - 585 bytes, 468 with bonus
Dissapointed with how large my last entry was, I decided to try something very different this time. Drawing on insertusernamehere
's idea of separating the city names from the state names, thus avoiding unnecessary logic and unneeded terminators, I still thought I've gotta be able to make the program smaller than the raw strings are. UPX wouldn't help me to cheat, complaining that the program was already too small. Thinking about compression, I tried to compress the 662 byte text output with WinRar but still only got 543 bytes - and that was without anything to decompress it with. It still seemed far too large, given that it was just the result, without any code.
Then I realized - I'm only using 26 chars for the letters and another 2 for the spaces and the commas. Hmm, that fits into 32, which needs just 5 bits. So, I wrote a quick javascript program to encode the strings, assigning a-z to 0-25 and space and comma got 26 and 27. To keep things simple, every character is encoded in 5 bits, whether it needs this many or not. From there, I just stuck
all the bits together and broke them back into byte-sized chunks. This allowed me to pack the 563 bytes of strings into 353 bytes - a saving of 37.5% or some 210 bytes. I didn't quite manage to squeeze the program and data into the same space as just the unpacked data, but I came close enough to be happy.
Hxd view of binary:
68 3F 00 68 E8 01 68 4F 03 E8 1C 00 68 22 01 68 27 02 68 B3 03 E8 10 00 - h?.hè.hO.è..h".h'.h³.è..
BE 83 05 C6 04 00 68 4F 03 68 B3 03 E8 62 00 C3 55 89 E5 81 C5 04 00 8B - ¾ƒ.Æ..hO.h³.èb.ÃU‰å.Å..‹
76 02 8B 7E 00 B6 05 30 DB AC B2 08 D0 D0 D0 D3 FE CA FE CE 75 1E 80 FB - v.‹~.¶.0Û¬².ÐÐÐÓþÊþÎu.€û
1A 75 05 B3 20 E9 0D 00 80 FB 1B 75 05 B3 2C E9 03 00 80 C3 61 88 1D 47 - .u.³ é..€û.u.³,é..€Ãaˆ.G
B6 05 30 DB 08 D2 75 D4 FF 4E 04 75 CC 5D C2 06 00 53 B3 62 FE CB 88 DF - ¶.0Û.ÒuÔÿN.uÌ]Â..S³bþˈß
80 C7 19 38 D8 72 08 38 F8 77 04 B3 20 28 D8 5B C3 55 89 E5 81 C5 04 00 - €Ç.8Ør.8øw.³ (Ø[ÃU‰å.Å..
8B 76 00 31 C0 88 C2 89 C1 AC A8 FF 74 46 80 FA 20 74 35 08 D2 74 31 3C - ‹v.1ÀˆÂ‰Á¬¨ÿtF€ú t5.Òt1<
2C 75 30 B4 0E CD 10 89 CB 01 DB 03 5E 02 8A 07 E8 B6 FF CD 10 43 8A 07 - ,u0´.Í.‰Ë.Û.^.Š.è¶ÿÍ.CŠ.
E8 AE FF CD 10 B0 0D CD 10 B0 0A CD 10 C6 06 4C 03 00 30 D2 41 E9 C1 FF - è®ÿÍ.°.Í.°.Í.Æ.L..0ÒAéÁÿ
E8 96 FF B4 0E CD 10 88 C2 E9 B5 FF 5D C2 04 00 58 10 D7 1C 0B 64 C4 E4 - è–ÿ´.Í.ˆÂéµÿ]Â..X.×..dÄä
0E 77 60 1B 82 AD AC 9B 5A 96 3A A0 90 DE 06 12 28 19 1A 7A CC 53 54 98 - .w`.‚.¬›Z–: .Þ..(..zÌST˜
D0 29 A4 68 AC 8B 00 19 62 0E 86 49 0B 90 98 3B 62 93 30 1A 35 61 D1 04 - Ð)¤h¬‹..b.†I..˜;b“0.5aÑ.
50 01 01 CA B5 5B 50 08 26 E6 EA 2E A1 89 B4 34 68 03 40 F7 2D 12 D8 9C - P..ʵ[P.&æê.¡‰´4h.@÷-.Øœ
BA 30 34 96 D8 E6 CC CE 61 23 8D 9C 8B 23 41 B1 91 B5 24 76 17 22 44 D8 - º04–ØæÌÎa#.œ‹#A±‘µ$v."DØ
29 29 A1 BB 0B A5 37 37 60 58 40 DC 6E 60 5A C0 70 4A 44 26 E4 06 CC 1A - ))¡».¥77`X@Ün`ZÀpJD&ä.Ì.
29 36 D0 48 F5 42 D6 4D CE 24 6C DC DD A4 85 29 23 27 37 71 40 8E C7 34 - )6ÐHõBÖMÎ$lÜݤ…)#'7q@ŽÇ4
7B 7A 09 18 93 67 04 62 89 06 91 36 C1 43 52 53 06 DF 17 55 03 23 44 4D - {z..“g.b‰.‘6ÁCRS.ß.U.#DM
8D D5 24 76 27 34 4E 88 F6 C7 36 6F 22 D0 48 EC E0 8C CA E8 8F 73 73 C8 - .Õ$v'4NˆöÇ6o"ÐHìàŒÊè.ssÈ
A0 6E 40 43 67 A7 82 8B DA 68 D2 02 9B 5A 1A 27 2D BB 88 16 44 18 FB 60 - n@Cg§‚‹ÚhÒ.›Z.'-»ˆ.D.û`
06 89 39 BB 72 F0 C7 A0 1B 79 DC 46 A2 FB 58 1B 24 34 DB 3B 9A E5 D1 74 - .‰9»rðÇ .yÜF¢ûX.$4Û;šåÑt
DA 40 25 49 CD DC 9F 14 34 C5 41 16 3D 89 CB A3 02 80 6C 0D 68 1E E5 A2 - Ú@%IÍÜŸ.4ÅA.=‰Ë£.€l.h.å¢
5B 11 C9 82 35 A4 DC 80 B9 E9 60 51 34 24 4F 1B 04 D6 06 CC 1B 0A 24 C0 - [.É‚5¤Ü€¹é`Q4$O..Ö.Ì..$À
44 4A D9 62 06 A8 AE 8C F7 20 2C 8C DA D1 39 AC 9A 8B 84 AD 8C 92 D3 1C - DJÙb.¨®Œ÷ ,ŒÚÑ9¬š‹„.Œ’Ó.
86 92 5B 90 05 10 30 8D 9B B6 E5 2C 07 73 01 A1 22 78 D8 8E 08 AC 92 9B - †’[...0.›¶å,.s.¡"xØŽ.¬’›
9B B1 02 32 73 74 24 4F 1B - ›±.2st$O.
Source-code:
[section .text]
[bits 16]
[org 0x100]
entry_point:
push word 63 ; no of bytes of packed data = (5/8) * unpacked_length - rounded up tp nearest byte
push word states_packed
push word states_unpacked
call unpack_bytes
push word 290 ; no bytes of packed data
push word capitals_packed
push word capitals_unpacked
call unpack_bytes
; ensure there's a terminating null after the capitals
mov si, nullTerminator
mov [si], byte 0
;void outputStrings(char *cities, char *states)
push word states_unpacked
push word capitals_unpacked
call output_strings
; int 0x20
ret
;void unpack_states(char *unpackedDest, char *packedInput, int packed_length)
;unpack_capitals:
unpack_bytes:
push bp
mov bp, sp
add bp, 4
mov si, [bp + 2] ; point to the packed input
mov di, [bp + 0] ; point to the output buffer
mov dh, 5 ; number of bits remaining until we have a full output byte, ready to be translated from [0..25] --> [A..Z] (+65) or 26-->' ' or 27-->','
xor bl, bl ; clear our output accumalator
.unpack_get_byte:
lodsb
mov dl, 8 ; number of bits remaining in this packed byte before we need another one
.unpack_get_next_bit:
rcl al, 1 ; put most significant bit into carry flag
rcl bl, 1 ; and put it into the least significant bit of our accumalator
dec dl ; 1 bit less before we need another packed byte
dec dh ; 1 bit less until this output byte is done
jnz .checkInputBitsRemaining
.transform_output_byte:
cmp bl, 26 ; space is encoded as 26
jne .notSpace
mov bl, ' '
jmp .store_output_byte
.notSpace:
cmp bl, 27 ; comma is encoded as 27
jne .notComma
mov bl, ','
jmp .store_output_byte
.notComma:
.alphaChar:
add bl, 'a' ; change from [0..25] to [A..Z]
.store_output_byte:
mov [di], bl ; store it
inc di ; point to the next output element
mov dh, 5 ; and reset the count of bits till we get here again
xor bl, bl
.checkInputBitsRemaining:
or dl,dl ; see if we've emptied the packed byte yet
jnz .unpack_get_next_bit
dec word [bp + 4] ; decrement the number of bytes of input remaining to be processed
jnz .unpack_get_byte ; if we still have some, go back for more
.unpack_input_processed:
pop bp
ret 6
; input:
; al = char
; outpt:
; if al if an alpha char, ensures it is in range [capital-a .. capital-z]
toupper:
push bx
mov bl, 98
dec bl ; bl = 'a'
mov bh, bl
add bh, 25 ; bh = 'z'
cmp al, bl ;'a'
jb .toupperdone
cmp al, bh
ja .toupperdone
mov bl, 32
sub al, bl ;'A' - 'a'
.toupperdone:
pop bx
ret
;void outputStrings(char *cities, char *states)
output_strings:
push bp
mov bp, sp
add bp, 4
mov si, [bp + 0] ; si --> array of cities
xor ax, ax
; mov [lastChar], al ; last printed char is undefined at this point - we'll use this to know if we're processing the first entry
mov dl, al
; mov [string_index], ax ; zero the string_index too
mov cx, ax ; zero the string_index too
.getOutputChar:
lodsb
test al, 0xff
jz .outputDone ; if we've got a NULL, it's the string terminator so exit
; cmp byte [lastChar], ' ' ; if the last char was a space, we have to capitalize this one
cmp dl, ' ' ; if the last char was a space, we have to capitalize this one
je .make_ucase
; cmp byte [lastChar], 0
or dl, dl ; if this is 0, then it's the first char we've printed, therefore we know it should be capitalized
jz .make_ucase
cmp al, ',' ; if this is a comma, the city is done, so print the comma then do the state and a crlf, finally, increment the string_index
jne .printChar
mov ah, 0xe ; code for print-char, teletype output
int 0x10 ; print the char held in al
; mov bx, [string_index]
mov bx, cx;[string_index]
add bx,bx ; x2 since each state is 2 bytes long
add bx, [bp+2] ; bx --> states_unpacked[string_index]
mov al, [bx] ; get the first char of the state
call toupper ; upper case it
; mov ah, 0xe ;not needed, still set from above
int 0x10 ; and print it
inc bx
mov al, [bx] ; get the 2nd char of the state
call toupper ; uppercase it
; mov ah, 0xe ;not needed, still set from above
int 0x10 ; and print it
mov al, 0x0d ; print a CRLF
int 0x10
mov al, 0x0a
int 0x10
mov byte [lastChar], 0 ; zero this, so that the first letter of the new city will be capitalized, just like the first char in the string was
xor dl, dl ; zero this, so that the first letter of the new city will be capitalized, just like the first char in the string was
; inc word [string_index] ; increment our index, ready for the next city's state
inc cx ;word [string_index] ; increment our index, ready for the next city's state
jmp .getOutputChar ; go back and get the next char of the next city
.make_ucase:
call toupper
.printChar:
mov ah, 0xe
int 0x10
; mov [lastChar], al
mov dl, al
jmp .getOutputChar ; go back and get the next char of the next city
.outputDone:
pop bp
ret 4 ; return and clean-up the two vars from the stack
[section .data]
; 63 packed bytes, 100 unpacked (saved 37)
states_packed:
db 01011000b, 00010000b, 11010111b, 00011100b, 00001011b, 01100100b, 11000100b, 11100100b
db 00001110b, 01110111b, 01100000b, 00011011b, 10000010b, 10101101b, 10101100b, 10011011b
db 01011010b, 10010110b, 00111010b, 10100000b, 10010000b, 11011110b, 00000110b, 00010010b
db 00101000b, 00011001b, 00011010b, 01111010b, 11001100b, 01010011b, 01010100b, 10011000b
db 11010000b, 00101001b, 10100100b, 01101000b, 10101100b, 10001011b, 00000000b, 00011001b
db 01100010b, 00001110b, 10000110b, 01001001b, 00001011b, 10010000b, 10011000b, 00111011b
db 01100010b, 10010011b, 00110000b, 00011010b, 00110101b, 01100001b, 11010001b, 00000100b
db 01010000b, 00000001b, 00000001b, 11001010b, 10110101b, 01011011b, 01010000b
; 290 packed bytes, 463 unpacked (saved 173)
capitals_packed:
db 00001000b, 00100110b, 11100110b, 11101010b, 00101110b, 10100001b, 10001001b, 10110100b, 00110100b, 01101000b, 00000011b, 01000000b, 11110111b, 00101101b
db 00010010b, 11011000b, 10011100b, 10111010b, 00110000b, 00110100b, 10010110b, 11011000b, 11100110b, 11001100b, 11001110b, 01100001b, 00100011b, 10001101b
db 10011100b, 10001011b, 00100011b, 01000001b, 10110001b, 10010001b, 10110101b, 00100100b, 01110110b, 00010111b, 00100010b, 01000100b, 11011000b, 00101001b
db 00101001b, 10100001b, 10111011b, 00001011b, 10100101b, 00110111b, 00110111b, 01100000b, 01011000b, 01000000b, 11011100b, 01101110b, 01100000b, 01011010b
db 11000000b, 01110000b, 01001010b, 01000100b, 00100110b, 11100100b, 00000110b, 11001100b, 00011010b, 00101001b, 00110110b, 11010000b, 01001000b, 11110101b
db 01000010b, 11010110b, 01001101b, 11001110b, 00100100b, 01101100b, 11011100b, 11011101b, 10100100b, 10000101b, 00101001b, 00100011b, 00100111b, 00110111b
db 01110001b, 01000000b, 10001110b, 11000111b, 00110100b, 01111011b, 01111010b, 00001001b, 00011000b, 10010011b, 01100111b, 00000100b, 01100010b, 10001001b
db 00000110b, 10010001b, 00110110b, 11000001b, 01000011b, 01010010b, 01010011b, 00000110b, 11011111b, 00010111b, 01010101b, 00000011b, 00100011b, 01000100b
db 01001101b, 10001101b, 11010101b, 00100100b, 01110110b, 00100111b, 00110100b, 01001110b, 10001000b, 11110110b, 11000111b, 00110110b, 01101111b, 00100010b
db 11010000b, 01001000b, 11101100b, 11100000b, 10001100b, 11001010b, 11101000b, 10001111b, 01110011b, 01110011b, 11001000b, 10100000b, 01101110b, 01000000b
db 01000011b, 01100111b, 10100111b, 10000010b, 10001011b, 11011010b, 01101000b, 11010010b, 00000010b, 10011011b, 01011010b, 00011010b, 00100111b, 00101101b
db 10111011b, 10001000b, 00010110b, 01000100b, 00011000b, 11111011b, 01100000b, 00000110b, 10001001b, 00111001b, 10111011b, 01110010b, 11110000b, 11000111b
db 10100000b, 00011011b, 01111001b, 11011100b, 01000110b, 10100010b, 11111011b, 01011000b, 00011011b, 00100100b, 00110100b, 11011011b, 00111011b, 10011010b
db 11100101b, 11010001b, 01110100b, 11011010b, 01000000b, 00100101b, 01001001b, 11001101b, 11011100b, 10011111b, 00010100b, 00110100b, 11000101b, 01000001b
db 00010110b, 00111101b, 10001001b, 11001011b, 10100011b, 00000010b, 10000000b, 01101100b, 00001101b, 01101000b, 00011110b, 11100101b, 10100010b, 01011011b
db 00010001b, 11001001b, 10000010b, 00110101b, 10100100b, 11011100b, 10000000b, 10111001b, 11101001b, 01100000b, 01010001b, 00110100b, 00100100b, 01001111b
db 00011011b, 00000100b, 11010110b, 00000110b, 11001100b, 00011011b, 00001010b, 00100100b, 11000000b, 01000100b, 01001010b, 11011001b, 01100010b, 00000110b
db 10101000b, 10101110b, 10001100b, 11110111b, 00100000b, 00101100b, 10001100b, 11011010b, 11010001b, 00111001b, 10101100b, 10011010b, 10001011b, 10000100b
db 10101101b, 10001100b, 10010010b, 11010011b, 00011100b, 10000110b, 10010010b, 01011011b, 10010000b, 00000101b, 00010000b, 00110000b, 10001101b, 10011011b
db 10110110b, 11100101b, 00101100b, 00000111b, 01110011b, 00000001b, 10100001b, 00100010b, 01111000b, 11011000b, 10001110b, 00001000b, 10101100b, 10010010b
db 10011011b, 10011011b, 10110001b, 00000010b, 00110010b, 01110011b, 01110100b, 00100100b, 01001111b, 00011011b
[section .bss]
lastChar resb 1 ; last printed char - used to capitalize chars after a space (i.e the 2nd or third word of a city name)
string_index resw 1 ; used to index into the array of states, which are each two bytes
states_unpacked resb 100 ; 50 states, 2 bytes each
capitals_unpacked resb 464
nullTerminator resb 1