Description: RFC 2822 grammar modified for LR(1) parsing rfc2822grammar_simplified.txt version 0.103 202005/08/21 16:06:37 excerpted from RFC 2822 and modified by Bruce Lilly N.B. some rules added to handle RFC 2047 encoded-words as amended by RFC 2231 and errata N.B. charset, token, encoded-text defined in RFC 2047, language in RFC 3066 .AS 0 72 d cgnstr ; common and generate-only grammar: .BC NO-WS-CTL = %d1-8 / ; US-ASCII control characters %d11 / ; that do not include the %d12 / ; carriage return, line feed, %d14-31 / ; and white space characters %d127 \"[ASCII NUL also excluded] .BC text = %d1-9 / ; 7-bit Characters excluding NUL, CR and LF %d11 / %d12 / %d14-127 .BC text-8bit = %d1-9 / ; octets excluding NUL, CR and LF %d11 / %d12 / %d14-255 body-7bit = *(*998text CRLF) *998text ; 7bit body \"[doesn't allow 8-bit or binary content] .BC body-8bit = *(*998text-8bit CRLF) *998text-8bit ; MIME 8bit body \"[for MIME 8bit content; doesn't allow binary content] body-binary = *OCTET ; MIME binary body \"[for MIME binary content] body = body-7bit / body-8bit / body-binary .BC specials = "(" / ")" / ; Special characters used in "<" / ">" / ; other parts of the syntax "[" / "]" / ":" / ";" / "@" / "\" / "," / "." / DQUOTE quoted-pair = "\" text F = CRLF WSP ; message header line folding \"[added to provide specification of line folding independent of FWS and CFWS] FWS = (*WSP F *WSP) / 1*WSP ; Folding white space .BC cchar = %d33-39 / ; Printable US-ASCII %d42-91 / ; characters not including "(", %d93-126 ; ")", or "\" \"[added for ctext in comments] ctext = NO-WS-CTL / ; Non white space controls cchar .BC cewchar = %d33-39 / ; Printable US-ASCII %d42-62 / ; characters not including "(", %d64-91 / ; ")", "\", %d93-126 ; or "?" \"[added for encoded-words in comments] .BC cew = "=?" charset ["*" language] "?" encoding "?" 1*67cewchar "?=" ; total length <= 75 octets \"[encoded-words in comments] ccontent = ctext / quoted-pair / comment comment = "(" [FWS] *((cew / 1*ccontent) FWS) [cew / 1*ccontent] ")" \"[w/o encoded-word handling: comment = "(" [FWS] *(ccontent [FWS]) ")" \"or comment = "(" [FWS] *(1*ccontent FWS) [1*ccontent] ")" \"] CFWS = *([FWS] comment) (([FWS] comment) / FWS) rcvd-ccontent = ctext / quoted-pair / rcvd-comment \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] rcvd-comment = "(" [FWS] *rcvd-ccontent [FWS] ")" \"or rcvd-comment = "(" [FWS] *(1*rcvd-ccontent FWS) [1*rcvd-ccontent] ")" \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] RCFWS = *([FWS] rcvd-comment) (([FWS] rcvd-comment) / FWS) \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] .BC atext = ALPHA / DIGIT / ; Any character except controls, "!" / "#" / ; SP, and specials. "$" / "%" / ; Used for atoms "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~" atom = [CFWS] 1*atext \"[leading CFWS for consistency with quoted-string] dot-atom-text = 1*atext *("." 1*atext) \"[no leading CFWS (for id-left, id-right in msg-id)] dot-atom = [CFWS] dot-atom-text \"[leading CFWS for local-part for addr-spec] rcvd-dot-atom = [RCFWS] dot-atom-text .BC qtext = NO-WS-CTL / ; Non white space controls %d33 / ; The rest of the US-ASCII %d35-91 / ; characters not including "\" %d93-126 ; or the quote character qcontent = qtext / quoted-pair quoted-string = [CFWS] DQUOTE [FWS] *(qcontent [FWS]) DQUOTE \"[leading CFWS for local-part for addr-spec] rcvd-quoted-string = [RCFWS] DQUOTE [FWS] *(qcontent [FWS]) DQUOTE word = atom / quoted-string \"[leading optional CFWS] .BC utext = NO-WS-CTL / ; Non white space controls %d33-126 ; The rest of US-ASCII \"[1*text (includes WSP characters) is NG as it allows WS-only continuation lines] day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" day = 1*2DIGIT month-name = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" year = 4*DIGIT date = day FWS month-name FWS year hour = 2DIGIT minute = 2DIGIT second = 2DIGIT time-of-day = hour ":" minute [":" second] zone = ( "+" / "-" ) 4DIGIT ;N.B. no CFWS between +- and 4DIGIT time = time-of-day FWS zone date-time = [day-name "," [FWS]] date FWS time .BC dtext = NO-WS-CTL / ; Non white space controls %d33-90 / ; The rest of the US-ASCII %d94-126 ; characters not including "[", ; "]", or "\" dcontent = dtext / quoted-pair domain-literal = [CFWS] "[" [FWS] *(dcontent [FWS]) "]" \"[leading CFWS for consistency with dot-atom (domain)] rcvd-domain-literal = [RCFWS] "[" [FWS] *(dcontent [FWS]) "]" no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE no-fold-literal = "[" *(dtext / quoted-pair) "]" id-left = dot-atom-text / no-fold-quote id-right = dot-atom-text / no-fold-literal msg-id = [CFWS] "<" id-left "@" id-right ">" \"[length limit(s)? id-right must accommodate at least 255 octets for domain] local-part = dot-atom / quoted-string \"[leading optional CFWS] rcvd-local-part = rcvd-dot-atom / rcvd-quoted-string \"[leading optional RCFWS] domain = dot-atom / domain-literal \"[leading optional CFWS] rcvd-domain = rcvd-dot-atom / rcvd-domain-literal \"[leading optional RCFWS] addr-spec = local-part [CFWS] "@" [CFWS] domain \"[leading optional CFWS for consistency with name-addr in mailbox] rcvd-addr-spec = rcvd-local-part [RCFWS] "@" rcvd-domain \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] angle-addr = [CFWS] "<" addr-spec [CFWS] ">" \"[leading CFWS for name-addr (consistency with display-name, phrase, word)] rcvd-angle-addr = [RCFWS] "<" [RCFWS] rcvd-addr-spec [RCFWS] ">" \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] pchar = ALPHA / DIGIT / "!" / "*" / "+" / "-" / "=" / "_" \"[valid characters in encoded-word encoded text in phrases] .BC pew = "=?" charset ["*" language] "?" encoding "?" 1*67pchar "?=" ; total length <= 75 octets \"[encoded-words in phrases] CFWSWS = *([FWS] comment) FWS ; ends with FWS WSCFWS = FWS *(comment [FWS]) ; begins with FWS WSCFWSWS = FWS [*(comment [FWS]) comment FWS] ; begins and ends with FWS .\" this won't be formatted well... .BC phrase = CFWSWS *(pew WSCFWSWS) pew WSCFWS / ; all encoded-words 1*([CFWSWS *(pew WSCFWSWS) pew WSP] 1*word) [(CFWSWS *(pew WSCFWSWS) pew WSCFWS)] / ; at least one word *word 1*(CFWSWS *(pew WSCFWSWS) pew WSCFWS word *word) [(CFWSWS *(pew WSCFWSWS) pew WSCFWS)] ; mix \"[w/o encoded-word handling: phrase = 1*word \"] \"[no trailing optional CFWS (because of end-phrase), leading optional CFWS is part of word] display-name = phrase name-addr = [display-name] angle-addr mailbox = name-addr / addr-spec \"[leading optional CFWS] mailbox-list = mailbox *([CFWS] "," mailbox) \"[leading optional CFWS] group = display-name [CFWS] ":" [mailbox-list] [CFWS] ";" \"[leading optional CFWS] address = mailbox / group \"[leading optional CFWS] address-list = (address *([CFWS] "," address)) end = *([FWS] comment) *WSP ; *WSP and/or comments at end of field, but no WS-only continuation line rcvd-end = *([FWS] rcvd-comment) *WSP ; *WSP and/or comments at end of Received field, but no WS-only continuation line \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] .\" this won't be formatted well... .BC end-phrase = CFWSWS *(pew WSCFWSWS) pew [WSP end] / ; all encoded-words 1*([CFWSWS *(pew WSCFWSWS) pew WSP] 1*word) [end / (CFWSWS *(pew WSCFWSWS) pew [WSP end])] / ; at least one word *word 1*(CFWSWS *(pew WSCFWSWS) pew WSP 1*word) [end / (CFWSWS *(pew WSCFWSWS) pew [WSP end])] ; mix ;N.B. trailing WS not required after last encoded-word \"[w/o encoded-word handling: end-phrase = 1*word end \"] phrase-list = *(phrase [CFWS] ",") end-phrase uew = "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" ; total length <= 75 octets \"[encoded-words in unstructured fields and user-defined fields] unstructured = [1*utext / (FWS uew)] *(FWS (1*utext / uew)) *WSP \"or unstructured = [1*utext] *(FWS (1*utext / uew)) *WSP \"[w/o encoded-word handling: unstructured = [[FWS] *(1*utext FWS) 1*utext] *WSP \"or unstructured = [1*utext] *(FWS 1*utext) *WSP \"] path = "<" [addr-spec] [CFWS] ">" item-name = ALPHA *("-" (ALPHA / DIGIT)) item-value = 1*rcvd-angle-addr / rcvd-addr-spec / atom / domain / msg-id \"[w/o encoded-word issues: item-value = 1*angle-addr / addr-spec / atom / domain / msg-id \"] name-val-pair = item-name RCFWS item-value name-val-list = *(name-val-pair RCFWS) \"N.B. 2822 specification does not provide for mandatory CFWS at end of list (as opposed to RFC 821 (required ) and 2821), i.e. \" *(name-val-pair RCFWS) [name-val-pair [RCFWS]] .BC ftext = %d33-57 / ; Any character except %d59-126 ; controls, SP, and ":". field-name = 1*ftext \"[N.B. no regular field may end with a production that may end with FWS or CFWS (that would permit WS-only continuation line)] orig-date = "Date:" [FWS] date-time end CRLF from = "From:" mailbox-list end CRLF sender = "Sender:" mailbox end CRLF reply-to = "Reply-To:" address-list end CRLF to = "To:" address-list end CRLF cc = "Cc:" address-list end CRLF bcc = "Bcc:" [address-list] end CRLF message-id = "Message-ID:" msg-id end CRLF in-reply-to = "In-Reply-To:" 1*msg-id end CRLF references = "References:" 1*msg-id end CRLF keywords = "Keywords:" phrase-list CRLF \"[leading optional CFWS and trailing optional end in phrase-list] subject = "Subject:" unstructured CRLF \"[N.B. RFC 1036 imposes syntax requirements on content "cmsg" and "re:", contradicting RFC 822] \"[N.B. in "Subject: foo", the (unstructured) field body is " foo", not "foo"] comments = "Comments:" unstructured CRLF resent-date = "Resent-Date:" [FWS] date-time end CRLF resent-from = "Resent-From:" mailbox-list end CRLF resent-sender = "Resent-Sender:" mailbox end CRLF resent-to = "Resent-To:" address-list end CRLF resent-cc = "Resent-Cc:" address-list end CRLF resent-bcc = "Resent-Bcc:" [address-list] end CRLF resent-msg-id = "Resent-Message-ID:" msg-id end CRLF return = "Return-Path:" [CFWS] path end CRLF received = "Received:" [RCFWS] name-val-list ";" [FWS] date-time rcvd-end CRLF trace = return 1*received optional-field = field-name ":" unstructured CRLF \"N.B. RFC 822 distinguishes extension fields from user-defined (beginning with case-insensitive "X-") fields \"N.B. RFC 2047 treats extension fields and user-defined fields differently ; text message overall grammar for generating: fields = *(trace *(resent-date / resent-from / resent-sender / resent-to / resent-cc / resent-bcc / resent-msg-id)) *(orig-date / from / sender / reply-to / to / cc / bcc / message-id / in-reply-to / references / subject / comments / keywords / optional-field) \"[allow optional-fields in initial block w/ trace?] message = fields CRLF body ; possible future extensions: word-dot = word / ([CFWS] ".") \"[optional leading CFWS] ; "ng-" prefixed constructs are not to be generated ng-phrase = phrase / 1*([CFWSWS *(pew WSCFWSWS) pew WSP] 1*word) *([CFWSWS *(pew WSCFWSWS) pew WSP] 1*word-dot) [(CFWSWS *(pew WSCFWSWS) pew WSCFWS)] / ; at least one word *word 1*(CFWSWS *(pew WSCFWSWS) pew WSCFWS 1*word-dot) [(CFWSWS *(pew WSCFWSWS) pew WSCFWS)] ; mix starting with word or pew \"N.B. similar to obs-phrase in 2822, but not "obsolete"; was never legal, might be legalized in future \"[w/o encoded-word handling: ng-phrase = word *word-dot \"] \"[optional leading CFWS] ng-end-phrase = end-phrase / ng-phrase *([CFWSWS *(pew WSCFWSWS) pew WSP] word-dot) [end / (CFWSWS *(pew WSCFWSWS) pew [WSP end])] \"N.B. trailing FWS not required after last encoded-word \"[w/o encoded-word handling: ng-end-phrase = word *word-dot end \"] \" ; "ng-" prefixed constructs are not to be generated \"[N.B. ng-end-phrase is not currently used, but may supersede end-phrase in future] ; parsing grammar: obs-text = %d0-127 \" N.B. original 2822 specification permitted multiple characters \"[unused] obs-qp = "\" (%d0-127) \" [no provision for RFC 822 quoted CRLF] .BC obs-char = %d0-9 / %d11 / ; %d0-127 except CR and %d12 / %d14-127 ; LF ; [obs-char is not used anywhere in this revised grammar] obs-FWS = (1*WSP *(F *WSP)) / (*F 1*WSP) / 1*F obs-ctext = ctext / %d0 \"[adds ASCII NUL to ctext; see discussion below] obs-ccontent = ctext / obs-qp / obs-comment \"N.B. RFC 822 ASCII NUL not permitted in comment, even with obs- rules, except as part of quoted-pair \" perhaps should define and use obs-ctext which would include ASCII NUL \" there doesn't seem much point in excluding NUL from comments in the parsing grammar since it's allowed in other places (see obs-text, obs-char, obs-qp, obs-utext) obs-comment = "(" [obs-FWS] *((cew / 1*obs-ccontent) obs-FWS) (*obs-ccontent / cew) [obs-FWS] ")" \"[w/o encoded-word handling: obs-comment = "(" [obs-FWS] *(obs-ccontent [obs-FWS]) ")" \"] obs-CFWS = *([obs-FWS] obs-comment) (([obs-FWS] obs-comment) / obs-FWS) obs-rcvd-ccontent = ctext / obs-qp / obs-rcvd-comment \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] obs-rcvd-comment = "(" [obs-FWS] *obs-rcvd-ccontent [obs-FWS] ")" \"or obs-rcvd-comment = "(" [obs-FWS] *(1*obs-rcvd-ccontent obs-FWS) [1*obs-rcvd-ccontent] ")" \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] obs-RCFWS = *([obs-FWS] obs-rcvd-comment) (([obs-FWS] obs-rcvd-comment) / obs-FWS) \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] obs-atom = [obs-CFWS] 1*atext obs-qtext = qtext / %d0 \"[adds ASCII NULL to qtext; see discussion below] obs-qcontent = qtext / obs-qp \"N.B. RFC 822 unescaped ASCII NUL not permitted, even with obs- rules; maybe use obs-qtext instead of qtext obs-quoted-string = [obs-CFWS] DQUOTE [obs-FWS] *(obs-qcontent [obs-FWS]) DQUOTE obs-word = obs-atom / obs-quoted-string \"[optional leading obs-CFWS] obs-word-dot = obs-word / ([obs-CFWS] ".") \"[optional leading obs-CFWS] obs-phrase = ng-phrase / 1*(([obs-CFWS] pew) / obs-word) *(([obs-CFWS] pew) / obs-word-dot) ; relaxed encoded-word/WSP requirements, unquoted dot \"[w/o encoded-word support: obs-phrase = 1*obs-word *(obs-word-dot) \"] \"N.B. use different from original 2822 obs-phrase \"[encoded-word/WSP requirements may be too lax] obs-display-name = obs-phrase obs-local-part = obs-word *([obs-CFWS] "." obs-word) \"[optional leading obs-CFWS] obs-dtext = dtext / %d0 \"[adds ASCII NULL to dtext; see discussion below] obs-dcontent = obs-dtext / obs-qp obs-domain-literal = [obs-CFWS] "[" [FWS] *(dcontent [FWS]) "]" \"N.B. RFC 822 unescaped ASCII NUL not permitted, even with obs- rules; maybe use obs-dcontent obs-domain = (obs-atom *([obs-CFWS] "." obs-atom)) / obs-domain-literal \"[optional leading obs-CFWS] obs-domain-list = "@" obs-domain *(1*([obs-CFWS] "," [obs-CFWS]) "@" obs-domain) obs-addr-spec = obs-local-part [obs-CFWS] "@" obs-domain \"[optional leading obs-CFWS] obs-route = [obs-CFWS] obs-domain-list [obs-CFWS] ":" obs-angle-addr = [obs-CFWS] "<" [[obs-route] obs-addr-spec] [obs-CFWS] ">" obs-name-addr = [obs-display-name] obs-angle-addr \"[optional leading obs-CFWS] obs-mailbox = obs-name-addr / obs-addr-spec \"[optional leading obs-CFWS] obs-mbox-list = 1*([obs-mailbox] [obs-CFWS] ",") [obs-mailbox] \"[optional leading obs-CFWS] obs-group = obs-display-name [obs-CFWS] ":" [obs-mbox-list] [obs-CFWS] ";" \"[optional leading obs-CFWS] obs-address = obs-mailbox / obs-group \"[optional leading obs-CFWS] obs-addr-list = 1*([obs-address] [obs-CFWS] ",") [obs-address] \"[optional leading obs-CFWS] obs-path = "<" [[obs-route] obs-addr-spec] [obs-CFWS] ">" obs-msg-id = [obs-CFWS] "<" obs-addr-spec [obs-CFWS] ">" \"[no obs-route] obs-phrase-list = obs-phrase / (1*([obs-phrase] [obs-CFWS] ",") [obs-phrase]) \"[optional leading obs-CFWS] obs-utext = utext / %d0 \"[adds ASCII NULL to utext] obs-unstructured = [([obs-FWS] 1*obs-utext) / ([obs-FWS] uew)] *(obs-FWS (1*obs-utext / uew)) [obs-FWS] \"[w/o encoded-word handling: obs-unstructured = [obs-FWS] *(1*obs-utext obs-FWS) *(obs-utext) \"] obs-year = 2*DIGIT obs-date = day obs-CFWS month-name obs-CFWS obs-year \"N.B. obs- rule does not permit (e.g.) 1Jan2001 which was permissible under RFC 822 obs-rcvd-date = day obs-RCFWS month-name obs-RCFWS obs-year \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] \"N.B. obs- rule does not permit (e.g.) 1Jan2001 which was permissible under RFC 822 obs-time-of-day = hour [obs-CFWS] ":" [obs-CFWS] minute [[obs-CFWS] ":" [obs-CFWS] second] obs-rcvd-time-of-day = hour [obs-RCFWS] ":" [obs-RCFWS] minute [[obs-RCFWS] ":" [obs-RCFWS] second] \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] .\" this won't be formatted nicely... .BC obs-zone = zone / "UT" / "GMT" / ; Universal Time ; North American UT ; offsets "EST" / "EDT" / ; Eastern: - 5/ - 4 "CST" / "CDT" / ; Central: - 6/ - 5 "MST" / "MDT" / ; Mountain: - 7/ - 6 "PST" / "PDT" / ; Pacific: - 8/ - 7 %d65-73 / ; Military zones - "A" %d75-90 / ; through "I" and "K" %d97-105 / ; through "Z", both %d107-122 ; upper and lower case obs-time = obs-time-of-day obs-CFWS obs-zone \"N.B. obs- rule does not permit adjacent time and zone, which was permissible under RFC 822 obs-rcvd-time = obs-rcvd-time-of-day obs-RCFWS obs-zone \"N.B. obs- rule does not permit adjacent time and zone, which was permissible under RFC 822 obs-date-time = [day-name [obs-CFWS] "," [obs-CFWS]] obs-date obs-CFWS obs-time \"N.B. obs- rule does not provide for adjacent date and time permitted by RFC 822 obs-rcvd-date-time = [day-name [obs-RCFWS] "," [obs-RCFWS]] obs-rcvd-date obs-RCFWS obs-rcvd-time \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] \"N.B. obs- rule does not provide for adjacent date and time permitted by RFC 822 obs-name-val-list = *(name-val-pair obs-RCFWS) obs-orig-date = "Date" *WSP ":" [obs-CFWS] obs-date-time [obs-CFWS] CRLF obs-from = "From" *WSP ":" obs-mbox-list [obs-CFWS] CRLF obs-sender = "Sender" *WSP ":" obs-mailbox [obs-CFWS] CRLF obs-reply-to = "Reply-To" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-to = "To" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-cc = "Cc" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-bcc = "Bcc" *WSP ":" [obs-addr-list] [obs-CFWS] CRLF obs-message-id = "Message-ID" *WSP ":" obs-msg-id [obs-CFWS] CRLF obs-in-reply-to = "In-Reply-To" *WSP ":" *(obs-phrase / obs-msg-id) [obs-CFWS] CRLF \"N.B. obs-phrase as defined here, which differs from original 2822 definition obs-references = "References" *WSP ":" *(obs-phrase / obs-msg-id) [obs-CFWS] CRLF \"N.B. obs-phrase as defined here, which differs from original 2822 definition obs-subject = "Subject" *WSP ":" obs-unstructured CRLF obs-comments = "Comments" *WSP ":" obs-unstructured CRLF obs-keywords = "Keywords" *WSP ":" obs-phrase-list [obs-CFWS] CRLF obs-resent-from = "Resent-From" *WSP ":" obs-mbox-list [obs-CFWS] CRLF obs-resent-send = "Resent-Sender" *WSP ":" obs-mailbox [obs-CFWS] CRLF obs-resent-date = "Resent-Date" *WSP ":" [obs-CFWS] obs-date-time [obs-CFWS] CRLF obs-resent-to = "Resent-To" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-resent-cc = "Resent-Cc" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-resent-bcc = "Resent-Bcc" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-resent-mid = "Resent-Message-ID" *WSP ":" obs-msg-id [obs-CFWS] CRLF obs-resent-rply = "Resent-Reply-To" *WSP ":" obs-addr-list [obs-CFWS] CRLF obs-return = "Return-Path" *WSP ":" [obs-CFWS] obs-path [obs-CFWS] CRLF obs-received = "Received" *WSP ":" [obs-RCFWS] obs-name-val-list [";" [obs-RCFWS] obs-rcvd-date-time] [obs-RCFWS] CRLF \"[for Received fields (encoded-words strictly forbidden (for no stated reason))] \"N.B. RFC 822 required date-time stamp, as do RFCs 821, 2821 \"N.B. reference online version of 2822 specification does not permit WSP before colon if date-time stamp is used obs-optional = field-name *WSP ":" obs-unstructured CRLF \"N.B. RFC 822 distinguishes extension fields from user-defined (beginning with case-insensitive "X-") fields \"N.B. RFC 2047 treats extension fields and user-defined fields differently ; text message overall grammar for parsing: obs-fields = *(obs-return / obs-received / obs-orig-date / obs-from / obs-sender / obs-reply-to / obs-to / obs-cc / obs-bcc / obs-message-id / obs-in-reply-to / obs-references / obs-subject / obs-comments / obs-keywords / obs-resent-date / obs-resent-from / obs-resent-send / obs-resent-rply / obs-resent-to / obs-resent-cc / obs-resent-bcc / obs-resent-mid / obs-optional) obs-message = obs-fields CRLF body .AE -------------------------------------------------------------------------------- Notes not part of modified grammar: For LR(1) parser compatibility, lexical tokens are grouped such that trailing WS, FWS, or CFWS is associated with its preceding lexical token. Therefore, no lexical token handled by the higher-level parser grammar rules has any ambiguity associated with optional WS, FWS, or CFWS. Additional rules such as: start = ":" obs-start = *WSP ":" ; or *WSP start fstart = ":" [FWS] obs-fstart = *WSP ":" [FWS] ; or *WSP fstart cstart = ":" [CFWS] obs-cstart = *WSP ":" [CFWS] ; or *WSP cstart can be used to simplify the field definition rules: orig-date = "Date" fstart date-time end CRLF subject = "Subject" start unstructured CRLF return = "Return-Path" cstart path end CRLF etc. And adding: resent = "Resent-" allows: resent-from = resent from etc., allowing the resent- fields to be simplified and ensuring that the definitions remain in sync between base and resent- versions.