How to analyze an XML file by python XMLの解読方法
Normally, I analyze an XML file by using the following code. If you use it, You will be able to understand easily the XML file structure.
# -*- coding: utf-8 -*-
import pandas as pd
import tkinter as tkinter
from tkinter import filedialog as tkFileDialog
import os
from lxml import etree, objectify
def xml_analysis(file, list):
parser = etree.XMLParser(remove_comments=False)
tree = objectify.parse(file, parser=parser)
root = tree.getroot()
def address(root, list, level=0):
count = -1
for child in root.find('.'):
if level == 0:
count += 1
element0 = child.tag
node0 = str('root') + str([count])
list.append([level, count, child.tag, child.attrib, child.text, element0, node0])
else:
count += 1
address_list = []
if count == 0:
level_node = list[-1][-1]
node = str(level_node) + str([count])
else:
node = str(level_node) + str([count])
for i in reversed(range(level + 1)):
address_list.append(eval(str('child') + str('.getparent()') * i + str('.tag')))
list.append([level, count, child.tag, child.attrib, child.text, '/'.join(address_list), node])
address(child, list, level + 1)
address(root, list, 0)
df = pd.DataFrame(list)
df.columns = ['level', 'count', 'tag', 'attrib', 'text', 'address', 'node']
df.to_csv(file.replace('.xml', '.csv'), index=None)
return df
if __name__ == '__main__':
root = tkinter.Tk()
root.attributes('-topmost', True)
root.withdraw()
dir = tkFileDialog.askdirectory()
os.chdir(dir)
fTyp = [("Select .xml file", "*.xml")]
xmlfiles = tkFileDialog.askopenfilenames(filetypes=fTyp, initialdir=dir)
for xmlfile in xmlfiles:
print(xmlfile.rsplit('/', 1)[1])
list = []
xml_analysis(xmlfile, list)
THE sample XML file is generated from the following link.
https://docs.python.org/3/library/xml.etree.elementtree.html
THE output CSV file is the following.
#pandas , #tkinter , #lxml , #os
Detail description will be updated later.
コピペで試してください。
この記事が気に入ったらサポートをしてみませんか?