Python:Files

From wiki
Jump to navigation Jump to search


Basics

glob.glob(filespec)
Return a list of files matching 'filespec'.

Code example:

import glob
files = glob.glob(filespec)
os.path.isfile(filename)
Boolean for file existence
fh = open (filename,"r")
open filename for read and return the filehandle fh. Use w for write, a for append.
fh.close()
Close the file for filehandle fh.

Code example:

import os
if os.path.isfile(filename):
    f1 =  open (filename,"r")
    for line in f1:
        <codeblock>
    f1.close()

Or 'Easier to Ask for Forgiveness than Permission' (EAFP):

try:
    fh = open (filename,"r")
except:
    print('ERROR: {} cannot be opened'.format(filename))
    logging.error('ERROR: {} cannot be opened'.format(filename))
else:
    <other code>
with open (filename,"r") as file
Open filename for read and close at the end of the loop

Code example:

with open (filename,"r") as file:
    for line in file:
        <codeblock>
f1.read(size)
Return 'size' bytes from the file as string. If size is omitted or 0 the entire file is returned.
f1.readlines()
list(f1)
Return all lines from file as list.
fileinput.input()
Read through all files specified on the commandline.
If there are no files on the commandline read standard input
You can pass other arguments too but you have to remove them from sys.argv before you start reading fileinput
import fileinput
import sys

otherarg = sys.argv.pop()  # other argument is the last on the commandline

for line in fileinput.input():
    <codeblock>
f1.write(line)
Write line to file opened on filehandle f1
sys.stdout.write(<string>)
Write to standard output
basename = filepath.split('/')[-1]
Get the filename from a path

Filehandling and metadata

os.unlink(filename)
Remove file or symbolic link
statinfo = os.stat(filename)
Get file metadata like:
posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719)
statinfo.st_size has the filesize in bytes.
Walking a direcotry tree and fetching file information
def do_dir(directory:
    with os.scandir(directory) as it:
        for entry in it:
            if not entry.name.startswith('.'):
                if entry.is_file():
                    filepath = entry.path
                    inode = entry.inode()
                    ctime = entry.stat().st_ctime # see statinfo for other data
                elif entry.is_dir():
                    do_dir(entry)

Archives

Read an archive

Read a file in a tar archive into a list of lines regardless the compression used (not zip).

import tarfile
tar = tarfile.open(<tarfile>,'r')
for member in tar.getmembers():
   print(member.name)
   filelist = tar.extractfile(member)

Copy files from 1 archive to another

#!/usr/bin/env python3
import tarfile

filenames = {<(part of) filename to copy>, <(part of) filename to copy>}
oldtar = tarfile.open('tar1.tar',"r")
newtar = tarfile.open('tar2.tar',"w")
for member in oldtar.getmembers():
    done = 0
    for filename in filenames:
        if filename in member.name:
            try:
                newtar.addfile(member, oldtar.extractfile(member.name))
                done = 1
            except OSError as exception:
                print(f"{member.name} has error {exception}")
                done = 2

    if done == 1:
        print(f"{member.name} Added")
    elif done == 0:
        print(f"{member.name} Skipped")
newtar.close()
oldtar.close()

Zip files

Check this page.

Read a zip-file

import zipfile

z = zipfile.ZipFile(zipile)
for file in z.namelist():
    print(file)

data = z.read(<zipped-filename>)

Create a zip-file

import zipfile,zlib

zipname = filename+'.zip'
zfile = zipfile.ZipFile(zipname, mode='w')
if zfile:
    zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)


Excel files

Reading

Excel-files is basically a zip-file with some specific content and they can be handled like that. Pandas has a build in ability to read excel into a dataframe, if possible use that.

Writing

Below writes a list of lists to excel

import xlsxwriter

def main():
    workbook = xlsxwriter.Workbook(excelfilename, {'nan_inf_to_errors': True})

    header = ['Column1', 'Column1']

    writeworkbook(workbook, worksheetname, alist, header)

    workbook.close()
    return

def writeworkbook(workbook, worksheetname, outlist, header):
    worksheet = workbook.add_worksheet(worksheetname)
    columnwidths = {}

    columnno = 0
    for column in header:
        columnwidths[columnno] = len(column)
        columnno += 1

    for row in outlist:
        columnno = 0
        for column in row:
            columnwidths[columnno] = max(columnwidths[columnno], len(str(column)))
            columnno += 1

    for columnno in columnwidths:
        worksheet.set_column(columnno, columnno, columnwidths[columnno] + columnwidths[columnno] * 0.1)

    wsindex = 0
    worksheet.write_row(wsindex, 0, header)
    for row in outlist:
        wsindex += 1
        worksheet.write_row(wsindex, 0, row)

    return

main()

Read from standard input and keyboard

Read from standard input

import sys

for line in sys.stdin:
    <codeblock>

Prompt and read from keyboard into a

a = input("Prompt: ")

In python2

a = raw_input("Prompt: ")

Read a csv

This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique. NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use Pandas or a specific csv-reader module if you need this.

def csv2dict(filespec, separator=','):
    '''Convert a csv-file to a list of dicts'''
    outfile = []
    filedir = glob.glob(filespec)
    for filename in filedir:
        try:
            fh = open(filename, "r")
        except:
            print('{} cannot be opened'.format(filename))
        else:
            filelist = [line.strip().split(separator) for line in fh]
            fh.close()
            header = filelist.pop(0)
            fieldnames = set(header)
            if len(header) != len(fieldnames):
                print('ERROR: Fieldnames in {} are not unique'.format(filename))
            else:
                numfields = len(header)
                linecount = 0
                for line in filelist:
                    linecount += 1
                    linedict = {}
                    count = 0
                    for field in line:
                        linedict[header[count]] = field
                        count += 1
                        if count > numfields - 1:
                            break
                    if count != numfields:
                        print('ERROR: invalid number of fields in line ' + str(linecount))
                    outfile.append(linedict)  
    return (outfile)

Read xml

Module and code examples Python:XML