;--- very simple html to (ascii) text converter. Public Domain.
;--- it's a sample for a mixed-language application (uses CRT)

;--- Win32 binary:
;--- assemble: jwasm -coff html2txt.asm
;--- link:     link /subsystem:console html2txt.obj \msvc\lib\libc.lib

;--- Linux binary:
;--- assemble: jwasm -zcw -elf -Fo html2txt.o html2txt.asm
;--- link:     gcc -o html2txt html2txt.o

    .386
    .model FLAT, c
    option casemap:none

printf  proto c :ptr BYTE, :VARARG
fopen   proto c :ptr BYTE, :ptr BYTE
fclose  proto c :ptr
fseek   proto c :ptr, :DWORD, :DWORD
ftell   proto c :ptr
fread   proto c :ptr BYTE, :DWORD, :DWORD, :ptr
fwrite  proto c :ptr BYTE, :DWORD, :DWORD, :ptr
strcat  proto c :ptr BYTE, :ptr BYTE
strcpy  proto c :ptr BYTE, :ptr BYTE
strlen  proto c :ptr BYTE
malloc  proto c :DWORD
free    proto c :ptr

if 1
externdef c errno:dword
else
;--- if errno is to be defined as a function call
__errno macro
__errno_location proto c  ;this is the gcc name
    call __errno_location
    mov eax,[eax]
    exitm <eax>
    endm
errno textequ <__errno()>
endif

SEEK_SET equ 0
SEEK_end equ 2
NULL     equ 0

lf  equ 10
cr  equ 13

CStr macro text
local xxx
    .const
xxx db text,0
    .code
    exitm <offset xxx>
    endm

    .code
;--- convert html text found in buffer
;--- 1. skip anything between angle brackets (<>)
;--- 2. translate &lt; and &gt; to '<' and '>'
;--- if a line contains tags only, it's skipped as a whole, including EOL
convertbuffer proc uses ebx esi edi buffer:ptr BYTE, size_:DWORD, psize:ptr DWORD

local outb:dword
local intag:byte
local taginline:byte
local cnt:word
local startline:dword

    invoke malloc, size_
    .if ( eax )
        mov ebx, eax
        mov edi, eax
        mov startline, edi
        mov esi, buffer
        mov cnt, 0
        lodsb
        .while (al)
            .if ( al == '<' )
                mov intag,1
                mov taginline,1
            .endif
            .if ( intag == 0 )
                .if ( al == '&' )
                    .if ( word ptr [esi] == 'tl' && byte ptr [esi+2] == ';' )
                        add esi,3
                        mov al,'<'
                    .elseif ( word ptr [esi] == 'tg' && byte ptr [esi+2] == ';' )
                        add esi,3
                        mov al,'>'
                    .elseif ( word ptr [esi] == 'ma' && word ptr [esi+2] == ';p' )
                        add esi,4
                        mov al,'&'
                    .endif
                .endif
                stosb
                .if ( al == lf )
                    ; skip line if it contains just a tag
                    .if ( cnt == 0 && taginline )
                        mov edi, startline
                    .endif
                    mov cnt, 0
                    mov taginline, 0
                    mov startline, edi
                .elseif ( al != cr )
                    inc cnt
                .endif
            .elseif ( al == '>')
                mov intag,0
            .endif
            lodsb
        .endw
        mov eax, ebx
        sub edi, eax
        mov ecx, psize
        mov [ecx], edi
    .endif
    ret
    align 4
convertbuffer endp

main proc c argc:dword, argv:ptr ptr

local filesize:dword
local buffer:dword
local outbuf:dword
local outbsize:dword
local fname[260]:byte

    .if ( argc < 2 )
        invoke printf, CStr(<"html2txt v1.0, Public Domain.",lf>)
        invoke printf, CStr(<"html2txt is a html to text converter.",lf>)
        invoke printf, CStr(<"usage: html2txt input_file [output_file]",lf>)
        mov eax,1
        ret
    .endif
    mov ebx,argv
    mov ebx,[ebx+1*4]
    invoke fopen, ebx, CStr("rb")
    .if ( eax )
        mov ebx, eax
        invoke fseek, ebx, 0, SEEK_END
        invoke ftell, ebx
        mov filesize, eax
        invoke fseek, ebx, 0, SEEK_SET
        mov eax, filesize
        inc eax
        invoke malloc, eax
        .if ( eax == 0 ) 
            invoke printf, CStr(<"out of memory",lf>)
            invoke fclose, ebx
            mov eax,1
            ret
        .endif
        mov buffer, eax
        invoke fread, buffer, 1, filesize, ebx
        push eax
        invoke fclose, ebx
        pop eax
        .if ( eax != filesize )
            invoke printf, CStr(<"read error [%u]",lf>), errno
            mov eax,1
            ret
        .endif
        mov edx, buffer
        mov byte ptr [edx+eax],0
        invoke convertbuffer, buffer, filesize, addr outbsize
        push eax
        invoke free, buffer
        pop eax
        .if ( eax )
            mov outbuf, eax
            mov edx, argv
            mov ebx, [edx+1*4]
            .if ( argc == 2 )
                invoke strlen, ebx
                add eax, ebx
                .while (eax != ebx && \
                        byte ptr [eax-1] != ':' && \
                        byte ptr [eax-1] != '\' && \
                        byte ptr [eax-1] != '/')
                    dec eax
                .endw
                lea ebx, fname
                invoke strcpy, ebx, eax
                invoke strcat, ebx, CStr(".txt")
            .else
                mov ebx, argv
                mov ebx, [ebx+2*4]
            .endif
            invoke fopen, ebx, CStr("wb")
            .if ( eax )
                mov ebx, eax
                invoke fwrite, outbuf, 1, outbsize, ebx
                .if ( eax != outbsize )
                    invoke printf, CStr(<"write error [%u]",lf>), errno
                .endif
                invoke fclose, ebx
                invoke printf, CStr(<"Done. %u bytes written",lf>), outbsize
            .else
                invoke printf, CStr(<"open('%s') failed [%u]",lf>), ebx, errno
            .endif
            invoke free, outbuf
        .endif
    .else
        invoke printf, CStr(<"open('%s') failed [%u]",lf>), ebx, errno
    .endif
    xor eax,eax
    ret
    align 4

main endp

    end