let s:save_cpo = &cpo set cpo&vim let s:template = { 'name': '', 'attr': {}, 'child': [] } function! s:nr2byte(nr) if a:nr < 0x80 return nr2char(a:nr) elseif a:nr < 0x800 return nr2char(a:nr/64+192).nr2char(a:nr%64+128) else return nr2char(a:nr/4096%16+224).nr2char(a:nr/64%64+128).nr2char(a:nr%64+128) endif endfunction function! s:nr2enc_char(charcode) if &encoding == 'utf-8' return nr2char(a:charcode) endif let char = s:nr2byte(a:charcode) if strlen(char) > 1 let char = strtrans(iconv(char, 'utf-8', &encoding)) endif return char endfunction function! s:nr2hex(nr) let n = a:nr let r = "" while n let r = '0123456789ABCDEF'[n % 16] . r let n = n / 16 endwhile return r endfunction function! s:decodeEntityReference(str) let str = a:str let str = substitute(str, '>', '>', 'g') let str = substitute(str, '<', '<', 'g') "let str = substitute(str, '"', '"', 'g') "let str = substitute(str, ''', "'", 'g') "let str = substitute(str, ' ', ' ', 'g') "let str = substitute(str, '¥', '\¥', 'g') let str = substitute(str, '&#x\([0-9a-fA-F]\+\);', '\=s:nr2enc_char("0x".submatch(1))', 'g') let str = substitute(str, '&#\(\d\+\);', '\=s:nr2enc_char(submatch(1))', 'g') let str = substitute(str, '&', '\&', 'g') return str endfunction function! s:encodeEntityReference(str) let str = a:str let str = substitute(str, '&', '\&', 'g') let str = substitute(str, '>', '\>', 'g') let str = substitute(str, '<', '\<', 'g') let str = substitute(str, '"', '\"', 'g') "let str = substitute(str, "\n", '\ ', 'g') "let str = substitute(str, '"', '"', 'g') "let str = substitute(str, "'", ''', 'g') "let str = substitute(str, ' ', ' ', 'g') return str endfunction function! s:matchNode(node, cond) if type(a:cond) == 1 && a:node.name == a:cond return 1 endif if type(a:cond) == 2 return a:cond(a:node) endif if type(a:cond) == 3 let ret = 1 for l:R in a:cond if !s:matchNode(a:node, l:R) | let ret = 0 | endif unlet l:R endfor return ret endif if type(a:cond) == 4 for k in keys(a:cond) if has_key(a:node.attr, k) && a:node.attr[k] == a:cond[k] | return 1 | endif endfor endif return 0 endfunction function! s:template.childNode(...) dict for c in self.child if type(c) == 4 && s:matchNode(c, a:000) return c endif unlet c endfor return {} endfunction function! s:template.childNodes(...) dict let ret = [] for c in self.child if type(c) == 4 && s:matchNode(c, a:000) let ret += [c] endif unlet c endfor return ret endfunction function! s:template.value(...) dict if a:0 let self.child = a:000 return endif let ret = '' for c in self.child if type(c) <= 1 || type(c) == 5 let ret .= c elseif type(c) == 4 let ret .= c.value() endif unlet c endfor return ret endfunction function! s:template.find(...) dict for c in self.child if type(c) == 4 if s:matchNode(c, a:000) return c endif unlet! ret let ret = c.find(a:000) if !empty(ret) return ret endif endif unlet c endfor return {} endfunction function! s:template.findAll(...) dict let ret = [] for c in self.child if type(c) == 4 if s:matchNode(c, a:000) call add(ret, c) endif let ret += c.findAll(a:000) endif unlet c endfor return ret endfunction function! s:template.toString() dict let xml = '<' . self.name for attr in keys(self.attr) let xml .= ' ' . attr . '="' . s:encodeEntityReference(self.attr[attr]) . '"' endfor if len(self.child) let xml .= '>' for c in self.child if type(c) == 4 let xml .= c.toString() elseif type(c) > 1 let xml .= s:encodeEntityReference(string(c)) else let xml .= s:encodeEntityReference(c) endif unlet c endfor let xml .= '' else let xml .= ' />' endif return xml endfunction function! webapi#xml#createElement(name) let node = deepcopy(s:template) let node.name = a:name return node endfunction function! s:parse_tree(ctx, top) let node = a:top let stack = [a:top] let pos = 0 " content accumulates the text only tags let content = "" let append_content_to_parent = 'if len(stack) && content != "" | call add(stack[-1].child, content) | let content ="" | endif' let mx = '^\s*\(]\+>\)' if a:ctx['xml'] =~ mx let match = matchstr(a:ctx['xml'], mx) let a:ctx['xml'] = a:ctx['xml'][stridx(a:ctx['xml'], match) + len(match):] let mx = 'encoding\s*=\s*["'']\{0,1}\([^"'' \t]\+\|[^"'']\+\)["'']\{0,1}' let matches = matchlist(match, mx) if len(matches) let encoding = matches[1] if len(encoding) && len(a:ctx['encoding']) == 0 let a:ctx['encoding'] = encoding let a:ctx['xml'] = iconv(a:ctx['xml'], encoding, &encoding) endif endif endif " this regex matches " 1) the remaining until the next tag begins " 2) maybe closing "/" of tag name " 3) tagname " 4) the attributes of the text (optional) " 5) maybe closing "/" (end of tag name) " or " 6) CDATA or '' " 7) text content of CDATA " 8) the remaining text after the tag (rest) " (These numbers correspond to the indexes in matched list m) let tag_mx = '^\(\_.\{-}\)\%(\%(<\(/\?\)\([^!/>[:space:]]\+\)\(\%([[:space:]]*[^/>=[:space:]]\+[[:space:]]*=[[:space:]]*\%([^"'' >\t]\+\|"[^"]*"\|''[^'']*''\)\|[[:space:]]\+[^/>=[:space:]]\+[[:space:]]*\)*\)[[:space:]]*\(/\?\)>\)\|\%(\)\|\(\)\)\(.*\)' while len(a:ctx['xml']) > 0 let m = matchlist(a:ctx.xml, tag_mx) if empty(m) | break | endif let is_end_tag = m[2] == '/' && m[5] == '' let is_start_and_end_tag = m[2] == '' && m[5] == '/' let tag_name = m[3] let attrs = m[4] if len(m[1]) let content .= s:decodeEntityReference(m[1]) endif if is_end_tag " closing tag: pop from stack and continue at upper level exec append_content_to_parent if len(stack) " TODO: checking whether opened tag is exist. call remove(stack, -1) endif let a:ctx['xml'] = m[9] continue endif " comment tag if m[8] != '' let a:ctx.xml = m[9] continue endif " if element is a CDATA if m[6] != '' let content .= m[7] let a:ctx.xml = m[9] continue endif let node = deepcopy(s:template) let node.name = tag_name let attr_mx = '\([^=[:space:]]\+\)\s*\%(=\s*''\([^'']*\)''\|=\s*"\([^"]*\)"\|=\s*\(\w\+\)\|\)' while len(attrs) > 0 let attr_match = matchlist(attrs, attr_mx) if len(attr_match) == 0 break endif let name = attr_match[1] let value = len(attr_match[2]) ? attr_match[2] : len(attr_match[3]) ? attr_match[3] : len(attr_match[4]) ? attr_match[4] : "" if value == "" let value = name endif let node.attr[name] = s:decodeEntityReference(value) let attrs = attrs[stridx(attrs, attr_match[0]) + len(attr_match[0]):] endwhile exec append_content_to_parent if len(stack) call add(stack[-1].child, node) endif if !is_start_and_end_tag " opening tag, continue parsing its contents call add(stack, node) endif let a:ctx['xml'] = m[9] endwhile endfunction function! webapi#xml#parse(xml) let top = deepcopy(s:template) let oldmaxmempattern=&maxmempattern let oldmaxfuncdepth=&maxfuncdepth let &maxmempattern=2000000 let &maxfuncdepth=2000 "try call s:parse_tree({'xml': a:xml, 'encoding': ''}, top) for node in top.child if type(node) == 4 return node endif unlet node endfor "catch /.*/ "endtry let &maxmempattern=oldmaxmempattern let &maxfuncdepth=oldmaxfuncdepth throw "Parse Error" endfunction function! webapi#xml#parseFile(fname) return webapi#xml#parse(join(readfile(a:fname), "\n")) endfunction function! webapi#xml#parseURL(url) return webapi#xml#parse(webapi#http#get(a:url).content) endfunction let &cpo = s:save_cpo unlet s:save_cpo " vim:set et: