Google AdSense

Tuesday, January 13, 2015

HTML composition

  • Before
  • <ul><li>level 1</li><li>level 2</li><ul><li>level 21</li><li>level 22</li></ul></ul>
    
  • After
  • <ul>
    <li>level 1</li>
    <li>level 2</li>
    <ul>
    <li>level 21</li>
    <li>level 22</li>
    </ul>
    </ul>
    
  • Code
  • input_file = r'z:\input.html'
    output_file = r'z:\output.html'
    encoding = 'utf-8'
    with open(input_file, 'r', encoding=encoding) as f1:
        ss = [] # strings
        le = True # last end
        dn = False # do nothing
        for s in f1.read().split('<'):
            if s.startswith('pre>'):
                dn = True
                ss.append('')
            if dn:
                ss[-1] += '<'+s
            else:
                s = s.strip()
                if s:
                    if s[0] != '/':
                        if s.startswith('br'):
                            ss[-1] += '<'+s
                            le = True
                        else:
                            ss.append('<'+s) 
                            le = False
                    else:
                        if le:
                            ss.append('<'+s) 
                        else:
                            ss[-1] += '<'+s
                        le = True
            if s.startswith('/pre>'):
                le = True
                dn = False
        with open(output_file, 'w', encoding=encoding) as f2:
            f2.write('\n'.join(ss))
                

No comments:

Post a Comment