class League / CommonMark / Util / RegexHelper
final

RegexHelper

Provides regular expressions and utilities for parsing Markdown

All of the PARTIAL_ regex constants assume that they'll be used in case-insensitive searches All other complete regexes provided by this class (either via constants or methods) will have case-insensitivity enabled.

Constants

public

PARTIAL_ENTITY

Default: '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});'
public

PARTIAL_ESCAPABLE

Default: '[!"#$%&'()*+,.\/:;<=>?@[\\\]^_`{|}~-]'
public

PARTIAL_ESCAPED_CHAR

Default: unresolved
public

PARTIAL_IN_DOUBLE_QUOTES

Default: unresolved
public

PARTIAL_IN_SINGLE_QUOTES

Default: unresolved
public

PARTIAL_IN_PARENS

Default: unresolved
public

PARTIAL_REG_CHAR

Default: '[^\\()\x00-\x20]'
public

PARTIAL_IN_PARENS_NOSP

Default: unresolved
public

PARTIAL_TAGNAME

Default: '[a-z][a-z0-9-]*'
public

PARTIAL_BLOCKTAGNAME

Default: '(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
public

PARTIAL_ATTRIBUTENAME

Default: '[a-z_:][a-z0-9:._-]*'
public

PARTIAL_UNQUOTEDVALUE

Default: '[^"'=<>`\x00-\x20]+'
public

PARTIAL_SINGLEQUOTEDVALUE

Default: ''[^']*''
public

PARTIAL_DOUBLEQUOTEDVALUE

Default: '"[^"]*"'
public

PARTIAL_ATTRIBUTEVALUE

Default: unresolved
public

PARTIAL_ATTRIBUTEVALUESPEC

Default: unresolved
public

PARTIAL_ATTRIBUTE

Default: unresolved
public

PARTIAL_OPENTAG

Default: unresolved
public

PARTIAL_CLOSETAG

Default: unresolved
public

PARTIAL_OPENBLOCKTAG

Default: unresolved
public

PARTIAL_CLOSEBLOCKTAG

Default: unresolved
public

PARTIAL_HTMLCOMMENT

Default: '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->'
public

PARTIAL_PROCESSINGINSTRUCTION

Default: '[<][?][\s\S]*?[?][>]'
public

PARTIAL_DECLARATION

Default: unresolved
public

PARTIAL_CDATA

Default: '<!\[CDATA\[[\s\S]*?]\]>'
public

PARTIAL_HTMLTAG

Default: unresolved
public

PARTIAL_HTMLBLOCKOPEN

Default: unresolved
public
Default: unresolved
public

REGEX_PUNCTUATION

Default: '/^[\x{2000}-\x{206F}\x{2E00}-\x{2E7F}\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\\'!"#\$%&\(\)\*\+,\-\.\/:;<=>\?@\[\]\^_`\{\|\}~]/u'
public

REGEX_UNSAFE_PROTOCOL

Default: '/^javascript:|vbscript:|file:|data:/i'
public

REGEX_SAFE_DATA_PROTOCOL

Default: '/^data:image\/(?:png|gif|jpeg|webp)/i'
public

REGEX_NON_SPACE

Default: '/[^ \t\f\v\r\n]/'
public

REGEX_WHITESPACE_CHAR

Default: '/^[ \t\n\x0b\x0c\x0d]/'
public

REGEX_UNICODE_WHITESPACE_CHAR

Default: '/^\pZ|\s/u'
public

REGEX_THEMATIC_BREAK

Default: '/^(?:\*[ \t]*){3,}$|^(?:_[ \t]*){3,}$|^(?:-[ \t]*){3,}$/'
public
Default: '/^(?:<(?:[^<>\n\\\x00]|\\.)*>)/'

Methods

public static

isEscapable ( string $character ) : void

public static

isLetter ( string $character ) : void

public static

matchAt ( string $regex , string $string , int $offset ) : int|null

Attempt to match a regex in string s at offset offset

public static

matchFirst ( string $pattern , string $subject , int $offset ) : string[]|null

Functional wrapper around preg_match_all which only returns the first set of matches

public static

unescape ( string $string ) : void

Replace backslash escapes with literal characters

public static

getHtmlBlockOpenRegex ( int $type ) : void

Parameters

  • $type int
    HTML block type
public static

getHtmlBlockCloseRegex ( int $type ) : void

Parameters

  • $type int
    HTML block type
public static

isLinkPotentiallyUnsafe ( string $url ) : void