Difference between revisions of "Python:Strings"
Jump to navigation
Jump to search
(→Basic) |
|||
(33 intermediate revisions by the same user not shown) | |||
Line 20: | Line 20: | ||
;str1.strip(<chars>) | ;str1.strip(<chars>) | ||
:Return str1 with all trailing and leading <chars> removed. If <chars> is omitted all trailing and leading whitespaces are removed. | :Return str1 with all trailing and leading <chars> removed. If <chars> is omitted all trailing and leading whitespaces are removed. | ||
+ | |||
+ | ;str1.rstrip('\r\n') | ||
+ | ;str1.lstrip('<char>') | ||
+ | :Return str1 with all newline characters (windows, mac or unix) stripped from the end of str1, like perl 'chomp' does. | ||
+ | :lstrip removes characters from the beginning of str1 | ||
+ | :Without character specification all whitespaces are removed. | ||
+ | |||
+ | ;str1.upper() str1.lower() str1.title() | ||
+ | :Return str1 in upppercase, lowercase or with only all first characters in uppercase | ||
;str1.join(list) | ;str1.join(list) | ||
− | :Join list (or set or other sequence) into a string with str1 as separator. | + | ;str1.join(map(str,list)) |
+ | ;str1.join(str(e) for e in list) | ||
+ | :Join list (or set or other sequence) into a string with str1 as separator. The second and third form makes sure all elements are converted to string before they are joined. | ||
;str1.split(sep[,max]) | ;str1.split(sep[,max]) | ||
Line 33: | Line 44: | ||
;str1.ljust(w) | ;str1.ljust(w) | ||
;str1.rjust(w) | ;str1.rjust(w) | ||
− | :Put spaces around str1 | + | :Put spaces around str1 until length 'w' is reached. |
;str1.expandtabs(size) | ;str1.expandtabs(size) | ||
:Replace tabs by 'size' number of spaces. | :Replace tabs by 'size' number of spaces. | ||
− | == | + | ==Using format(string)== |
;str1.format(values) | ;str1.format(values) | ||
:Fill in 'values' in str1-fields ({}). By numbering the fields they can be in a different order than the values. | :Fill in 'values' in str1-fields ({}). By numbering the fields they can be in a different order than the values. | ||
Line 49: | Line 60: | ||
dict1 = {'value1':1, 'value2':2} | dict1 = {'value1':1, 'value2':2} | ||
− | "Value 2: {value2}, Value1: {value1}".format(dict1) | + | "Value 2: {value2}, Value1: {value1}".format(**dict1) |
+ | |||
+ | list1 = {'value1', 'value2'} | ||
+ | "Value 1: {}, Value2: {}".format(*list1) | ||
+ | |||
+ | version=3.6 | ||
+ | f"Format strings exist since version {version}" | ||
+ | |||
+ | </syntaxhighlight> | ||
+ | |||
+ | |||
+ | NOTE: Using format-stings (f"string with {variable}") you do not need .format anymore. | ||
+ | <syntaxhighlight lang='python'> | ||
+ | fstring = 'f"Convert a string to a format-string' | ||
+ | compiled_fstring = compile(fstring,fstring, 'eval') | ||
+ | print(eval(compiled_fstring)) | ||
</syntaxhighlight> | </syntaxhighlight> | ||
Line 56: | Line 82: | ||
;[[fill]align][sign][#][0][width][grouping_option][.precision][type] | ;[[fill]align][sign][#][0][width][grouping_option][.precision][type] | ||
− | :Generic format specification. Anything not needed can be left out. | + | :Generic format specification. Anything not needed can be left out [https://www.ranblog.com/blog/python-format/#groupingoption]. |
− | :e.g. 07d | + | :e.g. "{:07d}".format(5) fill out with 0 in front to 7 digits -> '0000005' |
+ | ::"{:010.6f}".format(5.7647) floating point with precision 6 and total width 10 left fill with 0 -> '005.764700' | ||
+ | ::"{:10,.2f}".format(12345.7647) floating point with precision 2,total width 10 and comma as thousand separator -> ' 12,345.76' | ||
+ | ::"{:#=+15_.3f}".format(10000) # as padding, padding after sign, always show sign, _ as thousand separator, floating point with precision 3 -> +####10_000.000 | ||
{| class="wikitable" | {| class="wikitable" | ||
Line 77: | Line 106: | ||
{| class="wikitable" | {| class="wikitable" | ||
|+Types | |+Types | ||
+ | |- | ||
+ | |s||String | ||
|- | |- | ||
|c||Character | |c||Character | ||
Line 94: | Line 125: | ||
|e||Exponent | |e||Exponent | ||
|- | |- | ||
− | |g||Python chooses between decimal, float or exponent | + | |g||Python chooses between decimal, float or exponent with lowercase e |
+ | |- | ||
+ | |G||Python chooses between decimal, float or exponent with capital E | ||
|} | |} | ||
+ | |||
+ | ==Datastructures== | ||
+ | Below can be used on [[Python:DataTypes|datatypes]] like lists and tuples too, not on sets. | ||
+ | ;json.dumps(dict, indent=4) | ||
+ | :Convert a dict into a json string nicely formatted. Indent each level with 4 spaces. | ||
+ | |||
+ | [https://docs.python.org/3/library/pprint.html pprint] provides similar functionality | ||
=Searching= | =Searching= | ||
==Basic== | ==Basic== | ||
− | ;if search in str1: | + | ;if <search> in str1: |
− | :True if search is in str1 | + | :True if <search> is in str1 |
− | ;str1.count(search) | + | ;str1.count(<search>) |
:Return how many times <search> is in str1 | :Return how many times <search> is in str1 | ||
− | ;str1.find(search) | + | ;str1.find(<search>) |
− | ;str1.index(search) | + | ;str1.index(<search>) |
− | :Return where search is found in str1. If not found -1 with find, throw exception with index. | + | :Return where <search> is found in str1. If not found -1 with find, throw exception with index. |
+ | |||
+ | ;str1.endswith(<search>) | ||
+ | ;str.startswith(<search>) | ||
+ | :Return True if str1 ends/starts with <search> (else returns False). | ||
− | ==Regular Expressions (regexp)== | + | ==[[Regular Expressions]] (regexp)== |
;import re | ;import re | ||
− | :The re | + | :The re module provides Perl-like [[Regular Expressions]] matching for string and byte objects. |
;re1 = re.compile(regexp) | ;re1 = re.compile(regexp) | ||
− | :Create regular expression object to use for matching. This is more efficient if the regular expression | + | :Create regular expression object to use for matching. This is more efficient if the regular expression is used several times in a program. |
+ | |||
+ | '''NOTE: In all examples below regexp can be replaced by the object from re.compile''' | ||
;re.sub(regexp,new,str1) | ;re.sub(regexp,new,str1) | ||
:Return str1 with all parts matching regexp replaced with new. | :Return str1 with all parts matching regexp replaced with new. | ||
− | : | + | :NOTE1: str1 remains unchanged. |
+ | :NOTE2: re.sub is much more expensive than string.replace | ||
+ | :NOTE3: if you want to use search modifiers like DOTALL you must use 'flags=<modifier>' or the object from re.compile. | ||
+ | |||
+ | ;re.split(regexp,str1,max) | ||
+ | :Split str1 into a list on regexp, like with split above superfluous elements will be in the max + 1 element. | ||
− | ; | + | ;match = re1.match(str1) |
− | ; | + | ;match = re.match(regexp,str1) |
:Find 'regexp' at the '''beginning''' of 'str1'. Return [[#Match Objects|match object]] if found, else return [[Python:DataTypes#None|None]]-object | :Find 'regexp' at the '''beginning''' of 'str1'. Return [[#Match Objects|match object]] if found, else return [[Python:DataTypes#None|None]]-object | ||
− | ; | + | ;match = re.search(regexp,str1) |
:Find first occurrence of 'regexp' in 'str1'. Return [[#Match Objects|match object]] if found, else return [[Python:DataTypes#None|None]]-object | :Find first occurrence of 'regexp' in 'str1'. Return [[#Match Objects|match object]] if found, else return [[Python:DataTypes#None|None]]-object | ||
− | ; | + | ;matchlist = re.findall(regexp,str1) |
:Find all occurrences of 'regexp' in 'str1'. Return a [[Python:DataTypes#List|list]] of strings. | :Find all occurrences of 'regexp' in 'str1'. Return a [[Python:DataTypes#List|list]] of strings. | ||
+ | :NOTE1: This may be very slow on regular expressions with multiple '.*?' clauses. Use finditer to prevent this. | ||
+ | :NOTE2: When using a subexpression findall returns a list of tuples for the subexpression matches. | ||
+ | ::Use finditer or, below solution to get the first subexpression matched: | ||
+ | <syntaxhighlight lang=python> | ||
+ | ListOfTuples = re.findall('(regexppart1 (subexpression1) regexppart2)',str1) | ||
+ | lst1 = [ a for a,b in ListOfTuples ] | ||
+ | </syntaxhighlight> | ||
− | ; | + | ;matches = re.finditer(regexp,str1) |
:Find all occurrences of 'regexp' in 'str1'. Return a list of [[#Match Objects|match objects]]. | :Find all occurrences of 'regexp' in 'str1'. Return a list of [[#Match Objects|match objects]]. | ||
===Match Objects=== | ===Match Objects=== | ||
− | ; | + | ;match .group() |
− | ; | + | ;match .group(0) |
:The matched string in match object 'mo' | :The matched string in match object 'mo' | ||
− | ; | + | ;match .group(1) |
:First submatch in the matched string in 'mo'. The first match is the first ( in the expression. | :First submatch in the matched string in 'mo'. The first match is the first ( in the expression. | ||
− | ; | + | ;match .start() |
:The start position of the matched string in 'mo' | :The start position of the matched string in 'mo' | ||
− | ; | + | ;match .end() |
:The end position of the matched string in 'mo' | :The end position of the matched string in 'mo' | ||
− | ; | + | ;match .span() |
:Tuple with start and end position of the matched string in 'mo' | :Tuple with start and end position of the matched string in 'mo' | ||
===Search Modifieres=== | ===Search Modifieres=== | ||
− | ;re.search(regexp,str1,modifier) | + | ;re1 = re.search(regexp,str1,modifier) |
;re1 = re.compile(regexp,modifier) | ;re1 = re.compile(regexp,modifier) | ||
:Modify how matching is done | :Modify how matching is done | ||
Line 157: | Line 215: | ||
;re.I | ;re.I | ||
:Ignore case | :Ignore case | ||
+ | ;re.M | ||
+ | :Multiline mode, ^ matches all line beginnings and $ all line endings. | ||
Code Example: | Code Example: | ||
Line 164: | Line 224: | ||
rel1 = re.compile('h.*n') | rel1 = re.compile('h.*n') | ||
print "Matching" | print "Matching" | ||
− | + | match = rel1.match(str1) | |
− | if | + | if match: |
− | print | + | print match.group() |
− | print | + | print match.start() |
− | print | + | print match.end() |
− | print | + | print match.span() |
else: | else: | ||
print "no match at beginning of string" | print "no match at beginning of string" | ||
Line 176: | Line 236: | ||
print "Searching" | print "Searching" | ||
− | + | match = re.search('t.*n',str1) | |
− | if | + | if match: |
− | print | + | print match.group() |
− | print | + | print match.start() |
− | print | + | print match.end() |
− | print | + | print match.span() |
print "Searching case insensitive" | print "Searching case insensitive" | ||
− | + | match = re.search('h.*n',str1,re.I) | |
− | if | + | if match: |
− | print | + | print match.group() |
− | print | + | print match.start() |
− | print | + | print match.end() |
− | print | + | print match.span() |
print "findall" | print "findall" | ||
re1 = re.compile('t') | re1 = re.compile('t') | ||
− | + | matchlist = re1.findall(str1) | |
− | + | for str2 in matchlist: | |
− | |||
− | |||
print str2 | print str2 | ||
print | print | ||
− | |||
print "finditer" | print "finditer" | ||
re1 = re.compile('i.') | re1 = re.compile('i.') | ||
− | + | matches = re1.finditer(str1) | |
− | if | + | if matches: |
− | for | + | for match in matches: |
− | print | + | print match.group() |
− | print | + | print match.start() |
− | print | + | print match.end() |
− | print | + | print match.span() |
print | print | ||
</syntaxhighlight> | </syntaxhighlight> |
Latest revision as of 17:41, 20 February 2024
Strings are immutable, all methods return a new string
Basics
- str1 + str2
- Return concatenation of str1 and str2
- str1 += str2
- Append str2 to str1
- str1 * 3
- Return str1 3 times
Formatting
Basic
- str1.replace(old,new[,cnt])
- Return str1 with old replaced by new (cnt times).
- str1.strip(<chars>)
- Return str1 with all trailing and leading <chars> removed. If <chars> is omitted all trailing and leading whitespaces are removed.
- str1.rstrip('\r\n')
- str1.lstrip('<char>')
- Return str1 with all newline characters (windows, mac or unix) stripped from the end of str1, like perl 'chomp' does.
- lstrip removes characters from the beginning of str1
- Without character specification all whitespaces are removed.
- str1.upper() str1.lower() str1.title()
- Return str1 in upppercase, lowercase or with only all first characters in uppercase
- str1.join(list)
- str1.join(map(str,list))
- str1.join(str(e) for e in list)
- Join list (or set or other sequence) into a string with str1 as separator. The second and third form makes sure all elements are converted to string before they are joined.
- str1.split(sep[,max])
- Split string into a list on sep into max + 1 elements (remainder is put in last element)
- str1.splitlines([keepends])
- Split on newline, with 'keepends' the newline is preserved.
- str1.center(w)
- str1.ljust(w)
- str1.rjust(w)
- Put spaces around str1 until length 'w' is reached.
- str1.expandtabs(size)
- Replace tabs by 'size' number of spaces.
Using format(string)
- str1.format(values)
- Fill in 'values' in str1-fields ({}). By numbering the fields they can be in a different order than the values.
- If values are in a dict, they can be addressed by their key.
Code Example
"Value 1: {}, Value2: {}".format(1,2)
"Value 2: {1}, Value1: {0}".format(1,2)
dict1 = {'value1':1, 'value2':2}
"Value 2: {value2}, Value1: {value1}".format(**dict1)
list1 = {'value1', 'value2'}
"Value 1: {}, Value2: {}".format(*list1)
version=3.6
f"Format strings exist since version {version}"
NOTE: Using format-stings (f"string with {variable}") you do not need .format anymore.
fstring = 'f"Convert a string to a format-string'
compiled_fstring = compile(fstring,fstring, 'eval')
print(eval(compiled_fstring))
- {[field]:formatspec}
- The format can be specified after the (optional) fieldnumber.
- [[fill]align][sign][#][0][width][grouping_option][.precision][type]
- Generic format specification. Anything not needed can be left out [1].
- e.g. "{:07d}".format(5) fill out with 0 in front to 7 digits -> '0000005'
- "{:010.6f}".format(5.7647) floating point with precision 6 and total width 10 left fill with 0 -> '005.764700'
- "{:10,.2f}".format(12345.7647) floating point with precision 2,total width 10 and comma as thousand separator -> ' 12,345.76'
- "{:#=+15_.3f}".format(10000) # as padding, padding after sign, always show sign, _ as thousand separator, floating point with precision 3 -> +####10_000.000
< | Left |
> | Right |
^ | Center |
= | Padding (after sign) |
# | Prepend for x, o and b types |
s | String |
c | Character |
d | decimal |
f | Float |
% | Percent |
o | Octal |
x | Hexadecimal |
b | Binary |
e | Exponent |
g | Python chooses between decimal, float or exponent with lowercase e |
G | Python chooses between decimal, float or exponent with capital E |
Datastructures
Below can be used on datatypes like lists and tuples too, not on sets.
- json.dumps(dict, indent=4)
- Convert a dict into a json string nicely formatted. Indent each level with 4 spaces.
pprint provides similar functionality
Searching
Basic
- if <search> in str1
- True if <search> is in str1
- str1.count(<search>)
- Return how many times <search> is in str1
- str1.find(<search>)
- str1.index(<search>)
- Return where <search> is found in str1. If not found -1 with find, throw exception with index.
- str1.endswith(<search>)
- str.startswith(<search>)
- Return True if str1 ends/starts with <search> (else returns False).
Regular Expressions (regexp)
- import re
- The re module provides Perl-like Regular Expressions matching for string and byte objects.
- re1 = re.compile(regexp)
- Create regular expression object to use for matching. This is more efficient if the regular expression is used several times in a program.
NOTE: In all examples below regexp can be replaced by the object from re.compile
- re.sub(regexp,new,str1)
- Return str1 with all parts matching regexp replaced with new.
- NOTE1: str1 remains unchanged.
- NOTE2: re.sub is much more expensive than string.replace
- NOTE3: if you want to use search modifiers like DOTALL you must use 'flags=<modifier>' or the object from re.compile.
- re.split(regexp,str1,max)
- Split str1 into a list on regexp, like with split above superfluous elements will be in the max + 1 element.
- match = re1.match(str1)
- match = re.match(regexp,str1)
- Find 'regexp' at the beginning of 'str1'. Return match object if found, else return None-object
- match = re.search(regexp,str1)
- Find first occurrence of 'regexp' in 'str1'. Return match object if found, else return None-object
- matchlist = re.findall(regexp,str1)
- Find all occurrences of 'regexp' in 'str1'. Return a list of strings.
- NOTE1: This may be very slow on regular expressions with multiple '.*?' clauses. Use finditer to prevent this.
- NOTE2: When using a subexpression findall returns a list of tuples for the subexpression matches.
- Use finditer or, below solution to get the first subexpression matched:
ListOfTuples = re.findall('(regexppart1 (subexpression1) regexppart2)',str1)
lst1 = [ a for a,b in ListOfTuples ]
- matches = re.finditer(regexp,str1)
- Find all occurrences of 'regexp' in 'str1'. Return a list of match objects.
Match Objects
- match .group()
- match .group(0)
- The matched string in match object 'mo'
- match .group(1)
- First submatch in the matched string in 'mo'. The first match is the first ( in the expression.
- match .start()
- The start position of the matched string in 'mo'
- match .end()
- The end position of the matched string in 'mo'
- match .span()
- Tuple with start and end position of the matched string in 'mo'
Search Modifieres
- re1 = re.search(regexp,str1,modifier)
- re1 = re.compile(regexp,modifier)
- Modify how matching is done
- re.DOTALL
- The . matches all characters (default is all characters except newline). Use for searching in web or book pages.
- re.I
- Ignore case
- re.M
- Multiline mode, ^ matches all line beginnings and $ all line endings.
Code Example:
import re
str1 = "The thing to cut in pieces"
rel1 = re.compile('h.*n')
print "Matching"
match = rel1.match(str1)
if match:
print match.group()
print match.start()
print match.end()
print match.span()
else:
print "no match at beginning of string"
print
print "Searching"
match = re.search('t.*n',str1)
if match:
print match.group()
print match.start()
print match.end()
print match.span()
print "Searching case insensitive"
match = re.search('h.*n',str1,re.I)
if match:
print match.group()
print match.start()
print match.end()
print match.span()
print "findall"
re1 = re.compile('t')
matchlist = re1.findall(str1)
for str2 in matchlist:
print str2
print
print "finditer"
re1 = re.compile('i.')
matches = re1.finditer(str1)
if matches:
for match in matches:
print match.group()
print match.start()
print match.end()
print match.span()
print