Difference between revisions of "Python:Files"
Line 148: | Line 148: | ||
<syntaxhighlight lang=python> | <syntaxhighlight lang=python> | ||
− | def csv2dict(filespec,separator=','): | + | def csv2dict(filespec, separator=','): |
+ | '''Convert a csv-file to a list of dicts''' | ||
outfile = [] | outfile = [] | ||
filedir = glob.glob(filespec) | filedir = glob.glob(filespec) | ||
for filename in filedir: | for filename in filedir: | ||
− | + | try: | |
− | fh = open (filename,"r") | + | fh = open(filename, "r") |
+ | except: | ||
+ | print('{} cannot be opened'.format(filename)) | ||
+ | else: | ||
filelist = [line.strip().split(separator) for line in fh] | filelist = [line.strip().split(separator) for line in fh] | ||
fh.close() | fh.close() | ||
Line 159: | Line 163: | ||
fieldnames = set(header) | fieldnames = set(header) | ||
if len(header) != len(fieldnames): | if len(header) != len(fieldnames): | ||
− | print('ERROR: Fieldnames in | + | print('ERROR: Fieldnames in {} are not unique'.format(filename)) |
− | else: | + | else: |
numfields = len(header) | numfields = len(header) | ||
linecount = 0 | linecount = 0 | ||
Line 167: | Line 171: | ||
linedict = {} | linedict = {} | ||
count = 0 | count = 0 | ||
− | for field in line: | + | for field in line: |
linedict[header[count]] = field | linedict[header[count]] = field | ||
count += 1 | count += 1 | ||
− | if count > numfields-1: | + | if count > numfields - 1: |
break | break | ||
if count != numfields: | if count != numfields: | ||
− | print('ERROR: invalid number of fields in line '+str(linecount)) | + | print('ERROR: invalid number of fields in line ' + str(linecount)) |
− | outfile.append(linedict) | + | outfile.append(linedict) |
− | return(outfile) | + | return (outfile)</syntaxhighlight> |
− | </syntaxhighlight> | ||
=Read xml= | =Read xml= | ||
Module and code examples [[Python:XML]] | Module and code examples [[Python:XML]] |
Revision as of 12:42, 9 March 2022
Basics
- glob.glob(filespec)
- Return a list of files matching 'filespec'.
Code example:
import glob
files = glob.glob(filespec)
- os.path.isfile(filename)
- Boolean for file existence
- fh = open (filename,"r")
- open filename for read and return the filehandle fh. Use w for write, a for append.
- fh.close()
- Close the file for filehandle fh.
Code example:
import os
if os.path.isfile(filename):
f1 = open (filename,"r")
for line in f1:
<codeblock>
f1.close()
Or 'Easier to Ask for Forgiveness than Permission' (EAFP):
try:
fh = open (filename,"r")
except:
print('ERROR: {} cannot be opened'.format(filename))
logging.error('ERROR: {} cannot be opened'.format(filename))
else:
<other code>
- os.unlink(filename)
- Remove file or symbolic link
- basename = filepath.split('/')[-1]
- Get the filename from a path
- statinfo = os.stat(filename)
- Get file metadata like:
posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719)
statinfo.st_size
has the filesize in bytes.
- with open (filename,"r") as file
- Open filename for read and close at the end of the loop
Code example:
with open (filename,"r") as file:
for line in file:
<codeblock>
- f1.read(size)
- Return 'size' bytes from the file as string. If size is omitted or 0 the entire file is returned.
- f1.readlines()
- list(f1)
- Return all lines from file as list.
- fileinput.input()
- Read through all files specified on the commandline.
- If there are no files on the commandline read standard input
import fileinput
for line in fileinput.input():
<codeblock>
- f1.write(line)
- Write line to file opened on filehandle f1
- sys.stdout.write(<string>)
- Write to standard output
gzip archives
Read an archive
Read a file in a tar archive into a list of lines regardless the compression used (not zip).
import tarfile
tar = tarfile.open(<tarfile>,'r')
for member in tar.getmembers():
print(member.name)
filelist = tar.extractfile(member)
Zip files
Check this page.
Read a zip-file
import zipfile
z = zipfile.ZipFile(zipile)
for file in z.namelist():
print(file)
data = z.read(<zipped-filename>)
Create a zip-file
import zipfile,zlib
zipname = filename+'.zip'
zfile = zipfile.ZipFile(zipname, mode='w')
if zfile:
zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)
Read an Excel file
Excel-files is basically a zip-file with some specific content.
Read from standard input and keyboard
Read from standard input
import sys
for line in sys.stdin:
<codeblock>
Prompt and read from keyboard into a
a = input("Prompt: ")
In python2
a = raw_input("Prompt: ")
Read a csv
This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique. NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use Pandas or a specific csv-reader module if you need this.
def csv2dict(filespec, separator=','):
'''Convert a csv-file to a list of dicts'''
outfile = []
filedir = glob.glob(filespec)
for filename in filedir:
try:
fh = open(filename, "r")
except:
print('{} cannot be opened'.format(filename))
else:
filelist = [line.strip().split(separator) for line in fh]
fh.close()
header = filelist.pop(0)
fieldnames = set(header)
if len(header) != len(fieldnames):
print('ERROR: Fieldnames in {} are not unique'.format(filename))
else:
numfields = len(header)
linecount = 0
for line in filelist:
linecount += 1
linedict = {}
count = 0
for field in line:
linedict[header[count]] = field
count += 1
if count > numfields - 1:
break
if count != numfields:
print('ERROR: invalid number of fields in line ' + str(linecount))
outfile.append(linedict)
return (outfile)
Read xml
Module and code examples Python:XML