Difference between revisions of "Python:Strings"
Jump to navigation
Jump to search
m (→Advanced) |
|||
Line 84: | Line 84: | ||
=Searching= | =Searching= | ||
+ | |||
+ | ;import re | ||
+ | :The re modules provides Perl-like regular expression matching for string and byte objects | ||
+ | |||
+ | ;re1 = re.compile(regexp) | ||
+ | :Create regular expression object to use for matching. This is more efficient if the regular expression in used several times in a program. | ||
+ | |||
+ | ;mo1 = re1.match(str1) | ||
+ | ;mo1 = re.match(str1,regexp) | ||
+ | :Find 'regexp' at the beginning of 'str1'. Return match object if found, else return [[Python:DataTypes#None|None]]-object | ||
+ | |||
+ | ;mo1 = re.search(str1,regexp) | ||
+ | :Find first occurrence of 'regexp' in 'str1'. Return match object if found, else return [[Python:DataTypes#None|None]]-object | ||
+ | |||
+ | ;lst1 = re.findall(str1,regexp) | ||
+ | :Find all occurrences of 'regexp' in 'str1'. Return a list of strings. | ||
+ | |||
+ | ;mol1 = re.finditer(str1,regexp) | ||
+ | :Find all occurrences of 'regexp' in 'str1'. Return a list of match objects. | ||
+ | |||
+ | ;mo.group() | ||
+ | :The matched string in match object 'mo' | ||
+ | ;mo.group(1) | ||
+ | :First submatch (in () ) in the matched string in match object 'mo' | ||
+ | ;mo.start() | ||
+ | :The start position of the matched string in match object 'mo' | ||
+ | ;mo.end() | ||
+ | :The end position of the matched string in match object 'mo' | ||
+ | ;mo.span() | ||
+ | :Tuple with start and end position of the matched string in match object 'mo' | ||
+ | |||
+ | ;re.search(str1,regexp,modifier) | ||
+ | ;re1 = re.compile(regexp,modifier) | ||
+ | :Modify how matching is done | ||
+ | |||
+ | ;re.DOTALL | ||
+ | :The . matches all characters (default is all characters except newline). Use for searching in web or book pages. | ||
+ | |||
+ | ;re.I | ||
+ | :Ignore case | ||
+ | |||
+ | Code Example | ||
+ | <syntaxhighlight lang='python'> | ||
+ | import re | ||
+ | str1 = "The thing to cut in pieces" | ||
+ | rel1 = re.compile('h.*n') | ||
+ | print "Matching" | ||
+ | mo1 = rel1.match(str1) | ||
+ | |||
+ | if m: | ||
+ | print mo1.group() | ||
+ | print mo1.start() | ||
+ | print mo1.end() | ||
+ | print mo1.span() | ||
+ | else: | ||
+ | print "no match at beginning of string" | ||
+ | print | ||
+ | |||
+ | print "Searching" | ||
+ | mo1 = re.search('t.*n',str1) | ||
+ | if mo1: | ||
+ | print mo1.group() | ||
+ | print mo1.start() | ||
+ | print mo1.end() | ||
+ | print mo1.span() | ||
+ | |||
+ | print "Searching case insensitive" | ||
+ | mo1 = re.search('h.*n',str1,re.I) | ||
+ | if mo1: | ||
+ | print mo1.group() | ||
+ | print mo1.start() | ||
+ | print mo1.end() | ||
+ | print mo1.span() | ||
+ | |||
+ | |||
+ | print "findall" | ||
+ | re1 = re.compile('t') | ||
+ | lst1 = re1.findall(str1) | ||
+ | if lst1: | ||
+ | print lst1 | ||
+ | for str2 in lst1: | ||
+ | print str2 | ||
+ | print | ||
+ | |||
+ | |||
+ | print "finditer" | ||
+ | re1 = re.compile('i.') | ||
+ | mol1 = re1.finditer(str1) | ||
+ | if mol1: | ||
+ | for mo1 in mol1: | ||
+ | print mo1.group() | ||
+ | print mo1.start() | ||
+ | print mo1.end() | ||
+ | print mo1.span() | ||
+ | print | ||
+ | |||
+ | </syntaxhighlight> |
Revision as of 23:10, 6 January 2018
Strings are immutable, all methods return a new string
Formatting
Basic
- str1.replace(old,new[,cnt])
- In str1 replace old by new (cnt times)
- str1.join(list)
- Join list (or set or other sequence) with str1 as separator
- str1.split(sep[,max)
- Split string in to a list on sep into max + 1 elements (remainder is put in last element)
- str1.splitlines([keepends])
- Split on newline, with keepends the newline is preserved.
- str1.center(w)
- str1.ljust(w)
- str1.rjust(w)
- Put spaces around str1 to length 'w' is reached.
- str1.expandtabs(size)
- Replace tabs by 'size' number of spaces
Advanced
- str1.format(values)
- Fill in 'values' in string-fields ({}), if numbered fields can be in a different format than the values.
- If values are in a dict, they can be addressed by their key.
Code Example
"Value 1: {}, Value2: {}".format(1,2)
"Value 2: {1}, Value1: {0}".format(1,2)
dict1 = {'value1':1, 'value2':2}
"Value 2: {value2}, Value1: {value1}".format(dict1)
- {[field]:spec}
- The format can be specified after the (optional) fieldnumber.
- <alignment><width>.<precision><type>
- Generic format specification. Anything not needed can be left out.
< | Left |
> | Right |
^ | Center |
= | Padding (after sign) |
# | Prepend for x, o and b types |
c | Character |
d | decimal |
f | Float |
% | Percent |
o | Octal |
x | Hexadecimal |
b | Binary |
e | Exponent |
g | Python chooses between decimal, float or exponent |
Searching
- import re
- The re modules provides Perl-like regular expression matching for string and byte objects
- re1 = re.compile(regexp)
- Create regular expression object to use for matching. This is more efficient if the regular expression in used several times in a program.
- mo1 = re1.match(str1)
- mo1 = re.match(str1,regexp)
- Find 'regexp' at the beginning of 'str1'. Return match object if found, else return None-object
- mo1 = re.search(str1,regexp)
- Find first occurrence of 'regexp' in 'str1'. Return match object if found, else return None-object
- lst1 = re.findall(str1,regexp)
- Find all occurrences of 'regexp' in 'str1'. Return a list of strings.
- mol1 = re.finditer(str1,regexp)
- Find all occurrences of 'regexp' in 'str1'. Return a list of match objects.
- mo.group()
- The matched string in match object 'mo'
- mo.group(1)
- First submatch (in () ) in the matched string in match object 'mo'
- mo.start()
- The start position of the matched string in match object 'mo'
- mo.end()
- The end position of the matched string in match object 'mo'
- mo.span()
- Tuple with start and end position of the matched string in match object 'mo'
- re.search(str1,regexp,modifier)
- re1 = re.compile(regexp,modifier)
- Modify how matching is done
- re.DOTALL
- The . matches all characters (default is all characters except newline). Use for searching in web or book pages.
- re.I
- Ignore case
Code Example
import re
str1 = "The thing to cut in pieces"
rel1 = re.compile('h.*n')
print "Matching"
mo1 = rel1.match(str1)
if m:
print mo1.group()
print mo1.start()
print mo1.end()
print mo1.span()
else:
print "no match at beginning of string"
print
print "Searching"
mo1 = re.search('t.*n',str1)
if mo1:
print mo1.group()
print mo1.start()
print mo1.end()
print mo1.span()
print "Searching case insensitive"
mo1 = re.search('h.*n',str1,re.I)
if mo1:
print mo1.group()
print mo1.start()
print mo1.end()
print mo1.span()
print "findall"
re1 = re.compile('t')
lst1 = re1.findall(str1)
if lst1:
print lst1
for str2 in lst1:
print str2
print
print "finditer"
re1 = re.compile('i.')
mol1 = re1.finditer(str1)
if mol1:
for mo1 in mol1:
print mo1.group()
print mo1.start()
print mo1.end()
print mo1.span()
print