perl regex
# Perl regex metacharacters:
| Symbol | Meaning |
|:------:|:-----------------------:|
| . | any character |
| \w | alphanumeric and _ |
| \W | any non-word character |
| \s | any whitespace |
| \S | any non-whitespace |
| \d | any digit character |
| \D | any non-digit character |
| \t | tab |
| \n | newline |
| * | match 0 or more times |
| + | match 1 or more times |
| ? | match 0 or 1 times |
| {n} | match exactly n times |
| {n,m} | match n to m times |
| ^ | match from start |
| $ | match to end |
# Note:
# - use square brackets to match any of a set of characters, like [ACGT]
# - use ^ inside square brackets to negate matching those characters (i.e.
# when you don't want to match any of them)
# - use - to specify a character range, e.g. [a-d] to match any of a, b, c, d
# Example usage:
if ($dna_seq =~ m/^ATGCC[ACGT]GGN{6,9}(TAG|TGA|TAA)$/) {print "It's a match"};
# Where this will match any $dna_seq that starts with ATGCC, followed by one
# character from ACGT, followed by GG, followed by 6-9 N characters, and
# ending with TAG, TGA, or TAA