How to analyze an XML file by python XMLの解読方法

Normally, I analyze an XML file by using the following code. If you use it, You will be able to understand easily the XML file structure. 

# -*- coding: utf-8 -*-

import pandas as pd
import tkinter as tkinter
from tkinter import filedialog as tkFileDialog
import os
from lxml import etree, objectify


def xml_analysis(file, list):
   parser = etree.XMLParser(remove_comments=False)
   tree = objectify.parse(file, parser=parser)
   root = tree.getroot()

   def address(root, list, level=0):
       count = -1
       for child in root.find('.'):
           if level == 0:
               count += 1
               element0 = child.tag
               node0 = str('root') + str([count])
               list.append([level, count, child.tag, child.attrib, child.text, element0, node0])
           else:
               count += 1
               address_list = []
               if count == 0:
                   level_node = list[-1][-1]
                   node = str(level_node) + str([count])
               else:
                   node = str(level_node) + str([count])
               for i in reversed(range(level + 1)):
                   address_list.append(eval(str('child') + str('.getparent()') * i + str('.tag')))
               list.append([level, count, child.tag, child.attrib, child.text, '/'.join(address_list), node])

           address(child, list, level + 1)

   address(root, list, 0)

   df = pd.DataFrame(list)
   df.columns = ['level', 'count', 'tag', 'attrib', 'text', 'address', 'node']
   df.to_csv(file.replace('.xml', '.csv'), index=None)
   return df

if __name__ == '__main__':
   root = tkinter.Tk()
   root.attributes('-topmost', True)
   root.withdraw()
   dir = tkFileDialog.askdirectory()
   os.chdir(dir)

   fTyp = [("Select .xml file", "*.xml")]
   xmlfiles = tkFileDialog.askopenfilenames(filetypes=fTyp, initialdir=dir)

   for xmlfile in xmlfiles:
       print(xmlfile.rsplit('/', 1)[1])
       list = []
       xml_analysis(xmlfile, list)

THE sample XML file is generated from the following link. 

https://docs.python.org/3/library/xml.etree.elementtree.html

THE output CSV file is the following.

#pandas , #tkinter , #lxml , #os

Detail description will be updated later. 

コピペで試してください。

この記事が気に入ったらサポートをしてみませんか?