Difference between revisions of "XLS"
Jump to navigation
Jump to search
(Created page with "Category:Python Python XLS parser using standard modules. Found on [https://stackoverflow.com/questions/4371163/reading-xlsx-files-using-python stackoverflow]. It is ann...") |
m |
||
Line 11: | Line 11: | ||
from xml.etree.ElementTree import iterparse | from xml.etree.ElementTree import iterparse | ||
z = zipfile.ZipFile(fname) | z = zipfile.ZipFile(fname) | ||
− | strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml')) if el.tag.endswith('}t')] | + | # Get shared strings |
+ | strings = [el.text for e, el | ||
+ | in iterparse(z.open('xl/sharedStrings.xml')) | ||
+ | if el.tag.endswith('}t')] | ||
rows = [] | rows = [] | ||
row = {} | row = {} | ||
value = '' | value = '' | ||
for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')): | for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')): | ||
− | if el.tag.endswith('}v'): | + | # get value or index to shared strings |
+ | if el.tag.endswith('}v'): # <v>84</v> | ||
value = el.text | value = el.text | ||
− | if el.tag.endswith('}c'): | + | # If value is a shared string, use value as an index |
+ | if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c> | ||
if el.attrib.get('t') == 's': | if el.attrib.get('t') == 's': | ||
value = strings[int(value)] | value = strings[int(value)] | ||
− | letter = el.attrib['r'] | + | # split the row/col information so that the row leter(s) can be separate |
+ | letter = el.attrib['r'] # AZ22 | ||
while letter[-1].isdigit(): | while letter[-1].isdigit(): | ||
letter = letter[:-1] | letter = letter[:-1] |
Revision as of 22:40, 9 August 2018
Python XLS parser using standard modules.
Found on stackoverflow. It is announced as very very basic but it works very well.
This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] }
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
strings = [el.text for e, el
in iterparse(z.open('xl/sharedStrings.xml'))
if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows