libkmime

kmime_header_parsing.cpp
1 /*
2  kmime_header_parsing.cpp
3 
4  This file is part of KMime, the KDE internet mail/usenet news message library.
5  Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
6 
7  KMime is free software; you can redistribute it and/or modify it
8  under the terms of the GNU General Public License, version 2, as
9  published by the Free Software Foundation.
10 
11  KMime is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this library; if not, write to the Free Software
18  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 
20  In addition, as a special exception, the copyright holders give
21  permission to link the code of this library with any edition of
22  the TQt library by Trolltech AS, Norway (or with modified versions
23  of TQt that use the same license as TQt), and distribute linked
24  combinations including the two. You must obey the GNU General
25  Public License in all respects for all of the code used other than
26  TQt. If you modify this file, you may extend this exception to
27  your version of the file, but you are not obligated to do so. If
28  you do not wish to do so, delete this exception statement from
29  your version.
30 */
31 
32 #include <config.h>
33 #include "kmime_header_parsing.h"
34 
35 #include "kmime_codecs.h"
36 #include "kmime_util.h"
37 #include "kmime_warning.h"
38 
39 #include <tdeglobal.h>
40 #include <kcharsets.h>
41 
42 #include <tqtextcodec.h>
43 #include <tqmap.h>
44 #include <tqcstring.h>
45 #include <tqstringlist.h>
46 
47 #include <ctype.h> // for isdigit
48 #include <cassert>
49 
50 using namespace KMime;
51 using namespace KMime::Types;
52 
53 namespace KMime {
54 
55 namespace Types {
56 
57  TQString AddrSpec::asString() const {
58  bool needsQuotes = false;
59  TQString result;
60  result.reserve( localPart.length() + domain.length() + 1 );
61  for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) {
62  const char ch = localPart[i].latin1();
63  if ( ch == '.' || isAText( ch ) )
64  result += ch;
65  else {
66  needsQuotes = true;
67  if ( ch == '\\' || ch == '"' )
68  result += '\\';
69  result += ch;
70  }
71  }
72  if ( needsQuotes )
73  return '"' + result + "\"@" + domain;
74  else
75  return result + '@' + domain;
76  }
77 
78 }
79 
80 namespace HeaderParsing {
81 
82 // parse the encoded-word (scursor points to after the initial '=')
83 bool parseEncodedWord( const char* & scursor, const char * const send,
84  TQString & result, TQCString & language ) {
85 
86  // make sure the caller already did a bit of the work.
87  assert( *(scursor-1) == '=' );
88 
89  //
90  // STEP 1:
91  // scan for the charset/language portion of the encoded-word
92  //
93 
94  char ch = *scursor++;
95 
96  if ( ch != '?' ) {
97  kdDebug() << "first" << endl;
98  KMIME_WARN_PREMATURE_END_OF(EncodedWord);
99  return false;
100  }
101 
102  // remember start of charset (ie. just after the initial "=?") and
103  // language (just after the first '*') fields:
104  const char * charsetStart = scursor;
105  const char * languageStart = 0;
106 
107  // find delimiting '?' (and the '*' separating charset and language
108  // tags, if any):
109  for ( ; scursor != send ; scursor++ )
110  if ( *scursor == '?')
111  break;
112  else if ( *scursor == '*' && !languageStart )
113  languageStart = scursor + 1;
114 
115  // not found? can't be an encoded-word!
116  if ( scursor == send || *scursor != '?' ) {
117  kdDebug() << "second" << endl;
118  KMIME_WARN_PREMATURE_END_OF(EncodedWord);
119  return false;
120  }
121 
122  // extract the language information, if any (if languageStart is 0,
123  // language will be null, too):
124  TQCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/);
125  // extract charset information (keep in mind: the size given to the
126  // ctor is one off due to the \0 terminator):
127  TQCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart );
128 
129  //
130  // STEP 2:
131  // scan for the encoding portion of the encoded-word
132  //
133 
134 
135  // remember start of encoding (just _after_ the second '?'):
136  scursor++;
137  const char * encodingStart = scursor;
138 
139  // find next '?' (ending the encoding tag):
140  for ( ; scursor != send ; scursor++ )
141  if ( *scursor == '?' ) break;
142 
143  // not found? Can't be an encoded-word!
144  if ( scursor == send || *scursor != '?' ) {
145  kdDebug() << "third" << endl;
146  KMIME_WARN_PREMATURE_END_OF(EncodedWord);
147  return false;
148  }
149 
150  // extract the encoding information:
151  TQCString maybeEncoding( encodingStart, scursor - encodingStart + 1 );
152 
153 
154  kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
155  << "\"; language == \"" << maybeLanguage
156  << "\"; encoding == \"" << maybeEncoding << "\"" << endl;
157 
158  //
159  // STEP 3:
160  // scan for encoded-text portion of encoded-word
161  //
162 
163 
164  // remember start of encoded-text (just after the third '?'):
165  scursor++;
166  const char * encodedTextStart = scursor;
167 
168  // find next '?' (ending the encoded-text):
169  for ( ; scursor != send ; scursor++ )
170  if ( *scursor == '?' ) break;
171 
172  // not found? Can't be an encoded-word!
173  // ### maybe evaluate it nonetheless if the rest is OK?
174  if ( scursor == send || *scursor != '?' ) {
175  kdDebug() << "fourth" << endl;
176  KMIME_WARN_PREMATURE_END_OF(EncodedWord);
177  return false;
178  }
179  scursor++;
180  // check for trailing '=':
181  if ( scursor == send || *scursor != '=' ) {
182  kdDebug() << "fifth" << endl;
183  KMIME_WARN_PREMATURE_END_OF(EncodedWord);
184  return false;
185  }
186  scursor++;
187 
188  // set end sentinel for encoded-text:
189  const char * const encodedTextEnd = scursor - 2;
190 
191  //
192  // STEP 4:
193  // setup decoders for the transfer encoding and the charset
194  //
195 
196 
197  // try if there's a codec for the encoding found:
198  Codec * codec = Codec::codecForName( maybeEncoding );
199  if ( !codec ) {
200  KMIME_WARN_UNKNOWN(Encoding,maybeEncoding);
201  return false;
202  }
203 
204  // get an instance of a corresponding decoder:
205  Decoder * dec = codec->makeDecoder();
206  assert( dec );
207 
208  // try if there's a (text)codec for the charset found:
209  bool matchOK = false;
210  TQTextCodec
211  *textCodec = TDEGlobal::charsets()->codecForName( maybeCharset, matchOK );
212 
213  if ( !matchOK || !textCodec ) {
214  KMIME_WARN_UNKNOWN(Charset,maybeCharset);
215  delete dec;
216  return false;
217  };
218 
219  kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl;
220 
221  // allocate a temporary buffer to store the 8bit text:
222  int encodedTextLength = encodedTextEnd - encodedTextStart;
223  TQByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) );
224  TQByteArray::Iterator bit = buffer.begin();
225  TQByteArray::ConstIterator bend = buffer.end();
226 
227  //
228  // STEP 5:
229  // do the actual decoding
230  //
231 
232  if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) )
233  KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( "
234  << encodedTextLength << " )\nresult may be truncated" << endl;
235 
236  result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
237 
238  kdDebug() << "result now: \"" << result << "\"" << endl;
239  // cleanup:
240  delete dec;
241  language = maybeLanguage;
242 
243  return true;
244 }
245 
246 static inline void eatWhiteSpace( const char* & scursor, const char * const send ) {
247  while ( scursor != send
248  && ( *scursor == ' ' || *scursor == '\n' ||
249  *scursor == '\t' || *scursor == '\r' ) )
250  scursor++;
251 }
252 
253 bool parseAtom( const char * & scursor, const char * const send,
254  TQString & result, bool allow8Bit )
255 {
256  TQPair<const char*,int> maybeResult;
257 
258  if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
259  result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
260  return true;
261  }
262 
263  return false;
264 }
265 
266 bool parseAtom( const char * & scursor, const char * const send,
267  TQPair<const char*,int> & result, bool allow8Bit ) {
268  bool success = false;
269  const char * start = scursor;
270 
271  while ( scursor != send ) {
272  signed char ch = *scursor++;
273  if ( ch > 0 && isAText(ch) ) {
274  // AText: OK
275  success = true;
276  } else if ( allow8Bit && ch < 0 ) {
277  // 8bit char: not OK, but be tolerant.
278  KMIME_WARN_8BIT(ch);
279  success = true;
280  } else {
281  // CTL or special - marking the end of the atom:
282  // re-set sursor to point to the offending
283  // char and return:
284  scursor--;
285  break;
286  }
287  }
288  result.first = start;
289  result.second = scursor - start;
290  return success;
291 }
292 
293 bool parseToken( const char * & scursor, const char * const send,
294  TQString & result, bool allow8Bit )
295 {
296  TQPair<const char*,int> maybeResult;
297 
298  if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
299  result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
300  return true;
301  }
302 
303  return false;
304 }
305 
306 bool parseToken( const char * & scursor, const char * const send,
307  TQPair<const char*,int> & result, bool allow8Bit )
308 {
309  bool success = false;
310  const char * start = scursor;
311 
312  while ( scursor != send ) {
313  signed char ch = *scursor++;
314  if ( ch > 0 && isTText(ch) ) {
315  // TText: OK
316  success = true;
317  } else if ( allow8Bit && ch < 0 ) {
318  // 8bit char: not OK, but be tolerant.
319  KMIME_WARN_8BIT(ch);
320  success = true;
321  } else {
322  // CTL or tspecial - marking the end of the atom:
323  // re-set sursor to point to the offending
324  // char and return:
325  scursor--;
326  break;
327  }
328  }
329  result.first = start;
330  result.second = scursor - start;
331  return success;
332 }
333 
334 #define READ_ch_OR_FAIL if ( scursor == send ) { \
335  KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \
336  return false; \
337  } else { \
338  ch = *scursor++; \
339  }
340 
341 // known issues:
342 //
343 // - doesn't handle quoted CRLF
344 
345 bool parseGenericQuotedString( const char* & scursor, const char * const send,
346  TQString & result, bool isCRLF,
347  const char openChar, const char closeChar )
348 {
349  char ch;
350  // We are in a quoted-string or domain-literal or comment and the
351  // cursor points to the first char after the openChar.
352  // We will apply unfolding and quoted-pair removal.
353  // We return when we either encounter the end or unescaped openChar
354  // or closeChar.
355 
356  assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
357 
358  while ( scursor != send ) {
359  ch = *scursor++;
360 
361  if ( ch == closeChar || ch == openChar ) {
362  // end of quoted-string or another opening char:
363  // let caller decide what to do.
364  return true;
365  }
366 
367  switch( ch ) {
368  case '\\': // quoted-pair
369  // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
370  READ_ch_OR_FAIL;
371  KMIME_WARN_IF_8BIT(ch);
372  result += TQChar(ch);
373  break;
374  case '\r':
375  // ###
376  // The case of lonely '\r' is easy to solve, as they're
377  // not part of Unix Line-ending conventions.
378  // But I see a problem if we are given Unix-native
379  // line-ending-mails, where we cannot determine anymore
380  // whether a given '\n' was part of a CRLF or was occurring
381  // on it's own.
382  READ_ch_OR_FAIL;
383  if ( ch != '\n' ) {
384  // CR on it's own...
385  KMIME_WARN_LONE(CR);
386  result += TQChar('\r');
387  scursor--; // points to after the '\r' again
388  } else {
389  // CRLF encountered.
390  // lookahead: check for folding
391  READ_ch_OR_FAIL;
392  if ( ch == ' ' || ch == '\t' ) {
393  // correct folding;
394  // position cursor behind the CRLF WSP (unfolding)
395  // and add the WSP to the result
396  result += TQChar(ch);
397  } else {
398  // this is the "shouldn't happen"-case. There is a CRLF
399  // inside a quoted-string without it being part of FWS.
400  // We take it verbatim.
401  KMIME_WARN_NON_FOLDING(CRLF);
402  result += "\r\n";
403  // the cursor is decremented again, so's we need not
404  // duplicate the whole switch here. "ch" could've been
405  // everything (incl. openChar or closeChar).
406  scursor--;
407  }
408  }
409  break;
410  case '\n':
411  // Note: CRLF has been handled above already!
412  // ### LF needs special treatment, depending on whether isCRLF
413  // is true (we can be sure a lonely '\n' was meant this way) or
414  // false ('\n' alone could have meant LF or CRLF in the original
415  // message. This parser assumes CRLF iff the LF is followed by
416  // either WSP (folding) or NULL (premature end of quoted-string;
417  // Should be fixed, since NULL is allowed as per rfc822).
418  READ_ch_OR_FAIL;
419  if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
420  // folding
421  // correct folding
422  result += TQChar(ch);
423  } else {
424  // non-folding
425  KMIME_WARN_LONE(LF);
426  result += TQChar('\n');
427  // pos is decremented, so's we need not duplicate the whole
428  // switch here. ch could've been everything (incl. <">, "\").
429  scursor--;
430  }
431  break;
432  default:
433  KMIME_WARN_IF_8BIT(ch);
434  result += TQChar(ch);
435  }
436  }
437 
438  return false;
439 }
440 
441 // known issues:
442 //
443 // - doesn't handle encoded-word inside comments.
444 
445 bool parseComment( const char* & scursor, const char * const send,
446  TQString & result, bool isCRLF, bool reallySave )
447 {
448  int commentNestingDepth = 1;
449  const char * afterLastClosingParenPos = 0;
450  TQString maybeCmnt;
451  const char * oldscursor = scursor;
452 
453  assert( *(scursor-1) == '(' );
454 
455  while ( commentNestingDepth ) {
456  TQString cmntPart;
457  if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
458  assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
459  // see the kdoc for above function for the possible conditions
460  // we have to check:
461  switch ( *(scursor-1) ) {
462  case ')':
463  if ( reallySave ) {
464  // add the chunk that's now surely inside the comment.
465  result += maybeCmnt;
466  result += cmntPart;
467  if ( commentNestingDepth > 1 ) // don't add the outermost ')'...
468  result += TQChar(')');
469  maybeCmnt = TQString();
470  }
471  afterLastClosingParenPos = scursor;
472  --commentNestingDepth;
473  break;
474  case '(':
475  if ( reallySave ) {
476  // don't add to "result" yet, because we might find that we
477  // are already outside the (broken) comment...
478  maybeCmnt += cmntPart;
479  maybeCmnt += TQChar('(');
480  }
481  ++commentNestingDepth;
482  break;
483  default: assert( 0 );
484  } // switch
485  } else {
486  // !parseGenericQuotedString, ie. premature end
487  if ( afterLastClosingParenPos )
488  scursor = afterLastClosingParenPos;
489  else
490  scursor = oldscursor;
491  return false;
492  }
493  } // while
494 
495  return true;
496 }
497 
498 
499 // known issues: none.
500 
501 bool parsePhrase( const char* & scursor, const char * const send,
502  TQString & result, bool isCRLF )
503 {
504  enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None;
505  TQString tmp;
506  TQCString lang;
507  const char * successfullyParsed = 0;
508  // only used by the encoded-word branch
509  const char * oldscursor;
510  // used to suppress whitespace between adjacent encoded-words
511  // (rfc2047, 6.2):
512  bool lastWasEncodedWord = false;
513 
514  while ( scursor != send ) {
515  char ch = *scursor++;
516  switch ( ch ) {
517  case '.': // broken, but allow for intorop's sake
518  if ( found == None ) {
519  --scursor;
520  return false;
521  } else {
522  if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) )
523  result += ". ";
524  else
525  result += '.';
526  successfullyParsed = scursor;
527  }
528  break;
529  case '"': // quoted-string
530  tmp = TQString();
531  if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
532  successfullyParsed = scursor;
533  assert( *(scursor-1) == '"' );
534  switch ( found ) {
535  case None:
536  found = QuotedString;
537  break;
538  case Phrase:
539  case Atom:
540  case EncodedWord:
541  case QuotedString:
542  found = Phrase;
543  result += TQChar(' '); // rfc822, 3.4.4
544  break;
545  default:
546  assert( 0 );
547  }
548  lastWasEncodedWord = false;
549  result += tmp;
550  } else {
551  // premature end of quoted string.
552  // What to do? Return leading '"' as special? Return as quoted-string?
553  // We do the latter if we already found something, else signal failure.
554  if ( found == None ) {
555  return false;
556  } else {
557  result += TQChar(' '); // rfc822, 3.4.4
558  result += tmp;
559  return true;
560  }
561  }
562  break;
563  case '(': // comment
564  // parse it, but ignore content:
565  tmp = TQString();
566  if ( parseComment( scursor, send, tmp, isCRLF,
567  false /*don't bother with the content*/ ) ) {
568  successfullyParsed = scursor;
569  lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
570  } else {
571  if ( found == None )
572  return false;
573  else {
574  scursor = successfullyParsed;
575  return true;
576  }
577  }
578  break;
579  case '=': // encoded-word
580  tmp = TQString();
581  oldscursor = scursor;
582  lang = 0;
583  if ( parseEncodedWord( scursor, send, tmp, lang ) ) {
584  successfullyParsed = scursor;
585  switch ( found ) {
586  case None:
587  found = EncodedWord;
588  break;
589  case Phrase:
590  case EncodedWord:
591  case Atom:
592  case QuotedString:
593  if ( !lastWasEncodedWord )
594  result += TQChar(' '); // rfc822, 3.4.4
595  found = Phrase;
596  break;
597  default: assert( 0 );
598  }
599  lastWasEncodedWord = true;
600  result += tmp;
601  break;
602  } else
603  // parse as atom:
604  scursor = oldscursor;
605  // fall though...
606 
607  default: //atom
608  tmp = TQString();
609  scursor--;
610  if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
611  successfullyParsed = scursor;
612  switch ( found ) {
613  case None:
614  found = Atom;
615  break;
616  case Phrase:
617  case Atom:
618  case EncodedWord:
619  case QuotedString:
620  found = Phrase;
621  result += TQChar(' '); // rfc822, 3.4.4
622  break;
623  default:
624  assert( 0 );
625  }
626  lastWasEncodedWord = false;
627  result += tmp;
628  } else {
629  if ( found == None )
630  return false;
631  else {
632  scursor = successfullyParsed;
633  return true;
634  }
635  }
636  }
637  eatWhiteSpace( scursor, send );
638  }
639 
640  return ( found != None );
641 }
642 
643 
644 bool parseDotAtom( const char* & scursor, const char * const send,
645  TQString & result, bool isCRLF )
646 {
647  // always points to just after the last atom parsed:
648  const char * successfullyParsed;
649 
650  TQString tmp;
651  if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
652  return false;
653  result += tmp;
654  successfullyParsed = scursor;
655 
656  while ( scursor != send ) {
657  eatCFWS( scursor, send, isCRLF );
658 
659  // end of header or no '.' -> return
660  if ( scursor == send || *scursor != '.' ) return true;
661  scursor++; // eat '.'
662 
663  eatCFWS( scursor, send, isCRLF );
664 
665  if ( scursor == send || !isAText( *scursor ) ) {
666  // end of header or no AText, but this time following a '.'!:
667  // reset cursor to just after last successfully parsed char and
668  // return:
669  scursor = successfullyParsed;
670  return true;
671  }
672 
673  // try to parse the next atom:
674  TQString maybeAtom;
675  if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
676  scursor = successfullyParsed;
677  return true;
678  }
679 
680  result += TQChar('.');
681  result += maybeAtom;
682  successfullyParsed = scursor;
683  }
684 
685  scursor = successfullyParsed;
686  return true;
687 }
688 
689 
690 void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) {
691  TQString dummy;
692 
693  while ( scursor != send ) {
694  const char * oldscursor = scursor;
695 
696  char ch = *scursor++;
697 
698  switch( ch ) {
699  case ' ':
700  case '\t': // whitespace
701  case '\r':
702  case '\n': // folding
703  continue;
704 
705  case '(': // comment
706  if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) )
707  continue;
708  scursor = oldscursor;
709  return;
710 
711  default:
712  scursor = oldscursor;
713  return;
714  }
715 
716  }
717 }
718 
719 bool parseDomain( const char* & scursor, const char * const send,
720  TQString & result, bool isCRLF ) {
721  eatCFWS( scursor, send, isCRLF );
722  if ( scursor == send ) return false;
723 
724  // domain := dot-atom / domain-literal / atom *("." atom)
725  //
726  // equivalent to:
727  // domain = dot-atom / domain-literal,
728  // since parseDotAtom does allow CFWS between atoms and dots
729 
730  if ( *scursor == '[' ) {
731  // domain-literal:
732  TQString maybeDomainLiteral;
733  // eat '[':
734  scursor++;
735  while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
736  isCRLF, '[', ']' ) ) {
737  if ( scursor == send ) {
738  // end of header: check for closing ']':
739  if ( *(scursor-1) == ']' ) {
740  // OK, last char was ']':
741  result = maybeDomainLiteral;
742  return true;
743  } else {
744  // not OK, domain-literal wasn't closed:
745  return false;
746  }
747  }
748  // we hit openChar in parseGenericQuotedString.
749  // include it in maybeDomainLiteral and keep on parsing:
750  if ( *(scursor-1) == '[' ) {
751  maybeDomainLiteral += TQChar('[');
752  continue;
753  }
754  // OK, real end of domain-literal:
755  result = maybeDomainLiteral;
756  return true;
757  }
758  } else {
759  // dot-atom:
760  TQString maybeDotAtom;
761  if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
762  result = maybeDotAtom;
763  return true;
764  }
765  }
766  return false;
767 }
768 
769 bool parseObsRoute( const char* & scursor, const char* const send,
770  TQStringList & result, bool isCRLF, bool save ) {
771  while ( scursor != send ) {
772  eatCFWS( scursor, send, isCRLF );
773  if ( scursor == send ) return false;
774 
775  // empty entry:
776  if ( *scursor == ',' ) {
777  scursor++;
778  if ( save ) result.append( TQString() );
779  continue;
780  }
781 
782  // empty entry ending the list:
783  if ( *scursor == ':' ) {
784  scursor++;
785  if ( save ) result.append( TQString() );
786  return true;
787  }
788 
789  // each non-empty entry must begin with '@':
790  if ( *scursor != '@' )
791  return false;
792  else
793  scursor++;
794 
795  TQString maybeDomain;
796  if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false;
797  if ( save ) result.append( maybeDomain );
798 
799  // eat the following (optional) comma:
800  eatCFWS( scursor, send, isCRLF );
801  if ( scursor == send ) return false;
802  if ( *scursor == ':' ) { scursor++; return true; }
803  if ( *scursor == ',' ) scursor++;
804 
805  }
806 
807  return false;
808 }
809 
810 bool parseAddrSpec( const char* & scursor, const char * const send,
811  AddrSpec & result, bool isCRLF ) {
812  //
813  // STEP 1:
814  // local-part := dot-atom / quoted-string / word *("." word)
815  //
816  // this is equivalent to:
817  // local-part := word *("." word)
818 
819  TQString maybeLocalPart;
820  TQString tmp;
821 
822  while ( scursor != send ) {
823  // first, eat any whitespace
824  eatCFWS( scursor, send, isCRLF );
825 
826  char ch = *scursor++;
827  switch ( ch ) {
828  case '.': // dot
829  maybeLocalPart += TQChar('.');
830  break;
831 
832  case '@':
833  goto SAW_AT_SIGN;
834  break;
835 
836  case '"': // quoted-string
837  tmp = TQString();
838  if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) )
839  maybeLocalPart += tmp;
840  else
841  return false;
842  break;
843 
844  default: // atom
845  scursor--; // re-set scursor to point to ch again
846  tmp = TQString();
847  if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
848  maybeLocalPart += tmp;
849  else
850  return false; // parseAtom can only fail if the first char is non-atext.
851  break;
852  }
853  }
854 
855  return false;
856 
857 
858  //
859  // STEP 2:
860  // domain
861  //
862 
863 SAW_AT_SIGN:
864 
865  assert( *(scursor-1) == '@' );
866 
867  TQString maybeDomain;
868  if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) )
869  return false;
870 
871  result.localPart = maybeLocalPart;
872  result.domain = maybeDomain;
873 
874  return true;
875 }
876 
877 
878 bool parseAngleAddr( const char* & scursor, const char * const send,
879  AddrSpec & result, bool isCRLF ) {
880  // first, we need an opening angle bracket:
881  eatCFWS( scursor, send, isCRLF );
882  if ( scursor == send || *scursor != '<' ) return false;
883  scursor++; // eat '<'
884 
885  eatCFWS( scursor, send, isCRLF );
886  if ( scursor == send ) return false;
887 
888  if ( *scursor == '@' || *scursor == ',' ) {
889  // obs-route: parse, but ignore:
890  KMIME_WARN << "obsolete source route found! ignoring." << endl;
891  TQStringList dummy;
892  if ( !parseObsRoute( scursor, send, dummy,
893  isCRLF, false /* don't save */ ) )
894  return false;
895  // angle-addr isn't complete until after the '>':
896  if ( scursor == send ) return false;
897  }
898 
899  // parse addr-spec:
900  AddrSpec maybeAddrSpec;
901  if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false;
902 
903  eatCFWS( scursor, send, isCRLF );
904  if ( scursor == send || *scursor != '>' ) return false;
905  scursor++;
906 
907  result = maybeAddrSpec;
908  return true;
909 
910 }
911 
912 bool parseMailbox( const char* & scursor, const char * const send,
913  Mailbox & result, bool isCRLF ) {
914 
915  // rfc:
916  // mailbox := addr-spec / ([ display-name ] angle-addr)
917  // us:
918  // mailbox := addr-spec / ([ display-name ] angle-addr)
919  // / (angle-addr "(" display-name ")")
920 
921  eatCFWS( scursor, send, isCRLF );
922  if ( scursor == send ) return false;
923 
924  AddrSpec maybeAddrSpec;
925 
926  // first, try if it's a vanilla addr-spec:
927  const char * oldscursor = scursor;
928  if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
929  result.displayName = TQString();
930  result.addrSpec = maybeAddrSpec;
931  return true;
932  }
933  scursor = oldscursor;
934 
935  // second, see if there's a display-name:
936  TQString maybeDisplayName;
937  if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
938  // failed: reset cursor, note absent display-name
939  maybeDisplayName = TQString();
940  scursor = oldscursor;
941  } else {
942  // succeeded: eat CFWS
943  eatCFWS( scursor, send, isCRLF );
944  if ( scursor == send ) return false;
945  }
946 
947  // third, parse the angle-addr:
948  if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) )
949  return false;
950 
951  if ( maybeDisplayName.isNull() ) {
952  // check for the obsolete form of display-name (as comment):
953  eatWhiteSpace( scursor, send );
954  if ( scursor != send && *scursor == '(' ) {
955  scursor++;
956  if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) )
957  return false;
958  }
959  }
960 
961  result.displayName = maybeDisplayName;
962  result.addrSpec = maybeAddrSpec;
963  return true;
964 }
965 
966 bool parseGroup( const char* & scursor, const char * const send,
967  Address & result, bool isCRLF ) {
968  // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
969  //
970  // equivalent to:
971  // group := display-name ":" [ obs-mbox-list ] ";"
972 
973  eatCFWS( scursor, send, isCRLF );
974  if ( scursor == send ) return false;
975 
976  // get display-name:
977  TQString maybeDisplayName;
978  if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) )
979  return false;
980 
981  // get ":":
982  eatCFWS( scursor, send, isCRLF );
983  if ( scursor == send || *scursor != ':' ) return false;
984 
985  result.displayName = maybeDisplayName;
986 
987  // get obs-mbox-list (may contain empty entries):
988  scursor++;
989  while ( scursor != send ) {
990  eatCFWS( scursor, send, isCRLF );
991  if ( scursor == send ) return false;
992 
993  // empty entry:
994  if ( *scursor == ',' ) { scursor++; continue; }
995 
996  // empty entry ending the list:
997  if ( *scursor == ';' ) { scursor++; return true; }
998 
999  Mailbox maybeMailbox;
1000  if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) )
1001  return false;
1002  result.mailboxList.append( maybeMailbox );
1003 
1004  eatCFWS( scursor, send, isCRLF );
1005  // premature end:
1006  if ( scursor == send ) return false;
1007  // regular end of the list:
1008  if ( *scursor == ';' ) { scursor++; return true; }
1009  // eat regular list entry separator:
1010  if ( *scursor == ',' ) scursor++;
1011  }
1012  return false;
1013 }
1014 
1015 
1016 bool parseAddress( const char* & scursor, const char * const send,
1017  Address & result, bool isCRLF ) {
1018  // address := mailbox / group
1019 
1020  eatCFWS( scursor, send, isCRLF );
1021  if ( scursor == send ) return false;
1022 
1023  // first try if it's a single mailbox:
1024  Mailbox maybeMailbox;
1025  const char * oldscursor = scursor;
1026  if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
1027  // yes, it is:
1028  result.displayName = TQString();
1029  result.mailboxList.append( maybeMailbox );
1030  return true;
1031  }
1032  scursor = oldscursor;
1033 
1034  Address maybeAddress;
1035 
1036  // no, it's not a single mailbox. Try if it's a group:
1037  if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) )
1038  return false;
1039 
1040  result = maybeAddress;
1041  return true;
1042 }
1043 
1044 bool parseAddressList( const char* & scursor, const char * const send,
1045  AddressList & result, bool isCRLF ) {
1046  while ( scursor != send ) {
1047  eatCFWS( scursor, send, isCRLF );
1048  // end of header: this is OK.
1049  if ( scursor == send ) return true;
1050  // empty entry: ignore:
1051  if ( *scursor == ',' ) { scursor++; continue; }
1052 
1053  // parse one entry
1054  Address maybeAddress;
1055  if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false;
1056  result.append( maybeAddress );
1057 
1058  eatCFWS( scursor, send, isCRLF );
1059  // end of header: this is OK.
1060  if ( scursor == send ) return true;
1061  // comma separating entries: eat it.
1062  if ( *scursor == ',' ) scursor++;
1063  }
1064  return true;
1065 }
1066 
1067 
1068 static TQString asterisk = TQString::fromLatin1("*0*",1);
1069 static TQString asteriskZero = TQString::fromLatin1("*0*",2);
1070 //static TQString asteriskZeroAsterisk = TQString::fromLatin1("*0*",3);
1071 
1072 bool parseParameter( const char* & scursor, const char * const send,
1073  TQPair<TQString,TQStringOrTQPair> & result, bool isCRLF ) {
1074  // parameter = regular-parameter / extended-parameter
1075  // regular-parameter = regular-parameter-name "=" value
1076  // extended-parameter =
1077  // value = token / quoted-string
1078  //
1079  // note that rfc2231 handling is out of the scope of this function.
1080  // Therefore we return the attribute as TQString and the value as
1081  // (start,length) tupel if we see that the value is encoded
1082  // (trailing asterisk), for parseParameterList to decode...
1083 
1084  eatCFWS( scursor, send, isCRLF );
1085  if ( scursor == send ) return false;
1086 
1087  //
1088  // parse the parameter name:
1089  //
1090  TQString maybeAttribute;
1091  if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) )
1092  return false;
1093 
1094  eatCFWS( scursor, send, isCRLF );
1095  // premature end: not OK (haven't seen '=' yet).
1096  if ( scursor == send || *scursor != '=' ) return false;
1097  scursor++; // eat '='
1098 
1099  eatCFWS( scursor, send, isCRLF );
1100  if ( scursor == send ) {
1101  // don't choke on attribute=, meaning the value was omitted:
1102  if ( maybeAttribute.endsWith( asterisk ) ) {
1103  KMIME_WARN << "attribute ends with \"*\", but value is empty! "
1104  "Chopping away \"*\"." << endl;
1105  maybeAttribute.truncate( maybeAttribute.length() - 1 );
1106  }
1107  result = qMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
1108  return true;
1109  }
1110 
1111  const char * oldscursor = scursor;
1112 
1113  //
1114  // parse the parameter value:
1115  //
1116  TQStringOrTQPair maybeValue;
1117  if ( *scursor == '"' ) {
1118  // value is a quoted-string:
1119  scursor++;
1120  if ( maybeAttribute.endsWith( asterisk ) ) {
1121  // attributes ending with "*" designate extended-parameters,
1122  // which cannot have quoted-strings as values. So we remove the
1123  // trailing "*" to not confuse upper layers.
1124  KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! "
1125  "Chopping away \"*\"." << endl;
1126  maybeAttribute.truncate( maybeAttribute.length() - 1 );
1127  }
1128 
1129  if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
1130  scursor = oldscursor;
1131  result = qMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
1132  return false; // this case needs further processing by upper layers!!
1133  }
1134  } else {
1135  // value is a token:
1136  if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
1137  scursor = oldscursor;
1138  result = qMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
1139  return false; // this case needs further processing by upper layers!!
1140  }
1141  }
1142 
1143  result = qMakePair( maybeAttribute.lower(), maybeValue );
1144  return true;
1145 }
1146 
1147 
1148 
1149 bool parseRawParameterList( const char* & scursor, const char * const send,
1150  TQMap<TQString,TQStringOrTQPair> & result,
1151  bool isCRLF ) {
1152  // we use parseParameter() consecutively to obtain a map of raw
1153  // attributes to raw values. "Raw" here means that we don't do
1154  // rfc2231 decoding and concatenation. This is left to
1155  // parseParameterList(), which will call this function.
1156  //
1157  // The main reason for making this chunk of code a separate
1158  // (private) method is that we can deal with broken parameters
1159  // _here_ and leave the rfc2231 handling solely to
1160  // parseParameterList(), which will still be enough work.
1161 
1162  while ( scursor != send ) {
1163  eatCFWS( scursor, send, isCRLF );
1164  // empty entry ending the list: OK.
1165  if ( scursor == send ) return true;
1166  // empty list entry: ignore.
1167  if ( *scursor == ';' ) { scursor++; continue; }
1168 
1169  TQPair<TQString,TQStringOrTQPair> maybeParameter;
1170  if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
1171  // we need to do a bit of work if the attribute is not
1172  // NULL. These are the cases marked with "needs further
1173  // processing" in parseParameter(). Specifically, parsing of the
1174  // token or the quoted-string, which should represent the value,
1175  // failed. We take the easy way out and simply search for the
1176  // next ';' to start parsing again. (Another option would be to
1177  // take the text between '=' and ';' as value)
1178  if ( maybeParameter.first.isNull() ) return false;
1179  while ( scursor != send ) {
1180  if ( *scursor++ == ';' ) goto IS_SEMICOLON;
1181  }
1182  // scursor == send case: end of list.
1183  return true;
1184  IS_SEMICOLON:
1185  // *scursor == ';' case: parse next entry.
1186  continue;
1187  }
1188  // successful parsing brings us here:
1189  result.insert( maybeParameter.first, maybeParameter.second );
1190 
1191  eatCFWS( scursor, send, isCRLF );
1192  // end of header: ends list.
1193  if ( scursor == send ) return true;
1194  // regular separator: eat it.
1195  if ( *scursor == ';' ) scursor++;
1196  }
1197  return true;
1198 }
1199 
1200 
1201 static void decodeRFC2231Value( Codec* & rfc2231Codec,
1202  TQTextCodec* & textcodec,
1203  bool isContinuation, TQString & value,
1204  TQPair<const char*,int> & source ) {
1205 
1206  //
1207  // parse the raw value into (charset,language,text):
1208  //
1209 
1210  const char * decBegin = source.first;
1211  const char * decCursor = decBegin;
1212  const char * decEnd = decCursor + source.second;
1213 
1214  if ( !isContinuation ) {
1215  // find the first single quote
1216  while ( decCursor != decEnd ) {
1217  if ( *decCursor == '\'' ) break;
1218  else decCursor++;
1219  }
1220 
1221  if ( decCursor == decEnd ) {
1222  // there wasn't a single single quote at all!
1223  // take the whole value to be in latin-1:
1224  KMIME_WARN << "No charset in extended-initial-value. "
1225  "Assuming \"iso-8859-1\"." << endl;
1226  value += TQString::fromLatin1( decBegin, source.second );
1227  return;
1228  }
1229 
1230  TQCString charset( decBegin, decCursor - decBegin + 1 );
1231 
1232  const char * oldDecCursor = ++decCursor;
1233  // find the second single quote (we ignore the language tag):
1234  while ( decCursor != decEnd ) {
1235  if ( *decCursor == '\'' ) break;
1236  else decCursor++;
1237  }
1238  if ( decCursor == decEnd ) {
1239  KMIME_WARN << "No language in extended-initial-value. "
1240  "Trying to recover." << endl;
1241  decCursor = oldDecCursor;
1242  } else
1243  decCursor++;
1244 
1245  // decCursor now points to the start of the
1246  // "extended-other-values":
1247 
1248  //
1249  // get the decoders:
1250  //
1251 
1252  bool matchOK = false;
1253  textcodec = TDEGlobal::charsets()->codecForName( charset, matchOK );
1254  if ( !matchOK ) {
1255  textcodec = 0;
1256  KMIME_WARN_UNKNOWN(Charset,charset);
1257  }
1258  }
1259 
1260  if ( !rfc2231Codec ) {
1261  rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
1262  assert( rfc2231Codec );
1263  }
1264 
1265  if ( !textcodec ) {
1266  value += TQString::fromLatin1( decCursor, decEnd - decCursor );
1267  return;
1268  }
1269 
1270  Decoder * dec = rfc2231Codec->makeDecoder();
1271  assert( dec );
1272 
1273  //
1274  // do the decoding:
1275  //
1276 
1277  TQByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
1278  TQByteArray::Iterator bit = buffer.begin();
1279  TQByteArray::ConstIterator bend = buffer.end();
1280 
1281  if ( !dec->decode( decCursor, decEnd, bit, bend ) )
1282  KMIME_WARN << rfc2231Codec->name()
1283  << " codec lies about it's maxDecodedSizeFor()\n"
1284  "result may be truncated" << endl;
1285 
1286  value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
1287 
1288  kdDebug() << "value now: \"" << value << "\"" << endl;
1289  // cleanup:
1290  delete dec;
1291 }
1292 
1293 // known issues:
1294 // - permutes rfc2231 continuations when the total number of parts
1295 // exceeds 10 (other-sections then becomes *xy, ie. two digits)
1296 
1297 bool parseParameterList( const char* & scursor, const char * const send,
1298  TQMap<TQString,TQString> & result, bool isCRLF ) {
1299  // parse the list into raw attribute-value pairs:
1300  TQMap<TQString,TQStringOrTQPair> rawParameterList;
1301  if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) )
1302  return false;
1303 
1304  if ( rawParameterList.isEmpty() ) return true;
1305 
1306  // decode rfc 2231 continuations and alternate charset encoding:
1307 
1308  // NOTE: this code assumes that what TQMapIterator delivers is sorted
1309  // by the key!
1310 
1311  Codec * rfc2231Codec = 0;
1312  TQTextCodec * textcodec = 0;
1313  TQString attribute;
1314  TQString value;
1315  enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode;
1316 
1317  TQMapIterator<TQString,TQStringOrTQPair> it, end = rawParameterList.end();
1318 
1319  for ( it = rawParameterList.begin() ; it != end ; ++it ) {
1320  if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
1321  //
1322  // new attribute:
1323  //
1324 
1325  // store the last attribute/value pair in the result map now:
1326  if ( !attribute.isNull() ) result.insert( attribute, value );
1327  // and extract the information from the new raw attribute:
1328  value = TQString();
1329  attribute = it.key();
1330  mode = NoMode;
1331  // is the value encoded?
1332  if ( attribute.endsWith( asterisk ) ) {
1333  attribute.truncate( attribute.length() - 1 );
1334  mode = (Modes) ((int) mode | Encoded);
1335  }
1336  // is the value continued?
1337  if ( attribute.endsWith( asteriskZero ) ) {
1338  attribute.truncate( attribute.length() - 2 );
1339  mode = (Modes) ((int) mode | Continued);
1340  }
1341  //
1342  // decode if necessary:
1343  //
1344  if ( mode & Encoded ) {
1345  decodeRFC2231Value( rfc2231Codec, textcodec,
1346  false, /* isn't continuation */
1347  value, (*it).qpair );
1348  } else {
1349  // not encoded.
1350  if ( (*it).qpair.first )
1351  value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
1352  else
1353  value += (*it).qstring;
1354  }
1355 
1356  //
1357  // shortcut-processing when the value isn't encoded:
1358  //
1359 
1360  if ( !(mode & Continued) ) {
1361  // save result already:
1362  result.insert( attribute, value );
1363  // force begin of a new attribute:
1364  attribute = TQString();
1365  }
1366  } else /* it.key().startsWith( attribute ) */ {
1367  //
1368  // continuation
1369  //
1370 
1371  // ignore the section and trust TQMap to have sorted the keys:
1372  if ( it.key().endsWith( asterisk ) ) {
1373  // encoded
1374  decodeRFC2231Value( rfc2231Codec, textcodec,
1375  true, /* is continuation */
1376  value, (*it).qpair );
1377  } else {
1378  // not encoded
1379  if ( (*it).qpair.first )
1380  value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
1381  else
1382  value += (*it).qstring;
1383  }
1384  }
1385  }
1386 
1387  // write last attr/value pair:
1388  if ( !attribute.isNull() )
1389  result.insert( attribute, value );
1390 
1391  return true;
1392 }
1393 
1394 static const char * stdDayNames[] = {
1395  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
1396 };
1397 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
1398 
1399 static bool parseDayName( const char* & scursor, const char * const send )
1400 {
1401  // check bounds:
1402  if ( send - scursor < 3 ) return false;
1403 
1404  for ( int i = 0 ; i < stdDayNamesLen ; ++i )
1405  if ( tqstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
1406  scursor += 3;
1407  kdDebug() << "found " << stdDayNames[i] << endl;
1408  return true;
1409  }
1410 
1411  return false;
1412 }
1413 
1414 
1415 static const char * stdMonthNames[] = {
1416  "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1417  "Jul", "Aug", "Sep", "Oct", "Nov", "Dez"
1418 };
1419 static const int stdMonthNamesLen =
1420  sizeof stdMonthNames / sizeof *stdMonthNames;
1421 
1422 static bool parseMonthName( const char* & scursor, const char * const send,
1423  int & result )
1424 {
1425  // check bounds:
1426  if ( send - scursor < 3 ) return false;
1427 
1428  for ( result = 0 ; result < stdMonthNamesLen ; ++result )
1429  if ( tqstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
1430  scursor += 3;
1431  return true;
1432  }
1433 
1434  // not found:
1435  return false;
1436 }
1437 
1438 static const struct {
1439  const char * tzName;
1440  long int secsEastOfGMT;
1441 } timeZones[] = {
1442  // rfc 822 timezones:
1443  { "GMT", 0 },
1444  { "UT", 0 },
1445  { "EDT", -4*3600 },
1446  { "EST", -5*3600 },
1447  { "MST", -5*3600 },
1448  { "CST", -6*3600 },
1449  { "MDT", -6*3600 },
1450  { "MST", -7*3600 },
1451  { "PDT", -7*3600 },
1452  { "PST", -8*3600 },
1453  // common, non-rfc-822 zones:
1454  { "CET", 1*3600 },
1455  { "MET", 1*3600 },
1456  { "UTC", 0 },
1457  { "CEST", 2*3600 },
1458  { "BST", 1*3600 },
1459  // rfc 822 military timezones:
1460  { "Z", 0 },
1461  { "A", -1*3600 },
1462  { "B", -2*3600 },
1463  { "C", -3*3600 },
1464  { "D", -4*3600 },
1465  { "E", -5*3600 },
1466  { "F", -6*3600 },
1467  { "G", -7*3600 },
1468  { "H", -8*3600 },
1469  { "I", -9*3600 },
1470  // J is not used!
1471  { "K", -10*3600 },
1472  { "L", -11*3600 },
1473  { "M", -12*3600 },
1474  { "N", 1*3600 },
1475  { "O", 2*3600 },
1476  { "P", 3*3600 },
1477  { "Q", 4*3600 },
1478  { "R", 5*3600 },
1479  { "S", 6*3600 },
1480  { "T", 7*3600 },
1481  { "U", 8*3600 },
1482  { "V", 9*3600 },
1483  { "W", 10*3600 },
1484  { "X", 11*3600 },
1485  { "Y", 12*3600 },
1486 };
1487 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
1488 
1489 static bool parseAlphaNumericTimeZone( const char* & scursor,
1490  const char * const send,
1491  long int & secsEastOfGMT,
1492  bool & timeZoneKnown )
1493 {
1494  TQPair<const char*,int> maybeTimeZone(0,0);
1495  if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) )
1496  return false;
1497  for ( int i = 0 ; i < timeZonesLen ; ++i )
1498  if ( tqstrnicmp( timeZones[i].tzName,
1499  maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
1500  scursor += maybeTimeZone.second;
1501  secsEastOfGMT = timeZones[i].secsEastOfGMT;
1502  timeZoneKnown = true;
1503  return true;
1504  }
1505 
1506  // don't choke just because we don't happen to know the time zone
1507  KMIME_WARN_UNKNOWN(time zone,TQCString( maybeTimeZone.first, maybeTimeZone.second+1 ));
1508  secsEastOfGMT = 0;
1509  timeZoneKnown = false;
1510  return true;
1511 }
1512 
1513 // parse a number and return the number of digits parsed:
1514 static int parseDigits( const char* & scursor, const char * const send,
1515  int & result )
1516 {
1517  result = 0;
1518  int digits = 0;
1519  for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
1520  result *= 10;
1521  result += int( *scursor - '0' );
1522  }
1523  return digits;
1524 }
1525 
1526 static bool parseTimeOfDay( const char* & scursor, const char * const send,
1527  int & hour, int & min, int & sec, bool isCRLF=false )
1528 {
1529  // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
1530 
1531  //
1532  // 2DIGIT representing "hour":
1533  //
1534  if ( !parseDigits( scursor, send, hour ) ) return false;
1535 
1536  eatCFWS( scursor, send, isCRLF );
1537  if ( scursor == send || *scursor != ':' ) return false;
1538  scursor++; // eat ':'
1539 
1540  eatCFWS( scursor, send, isCRLF );
1541  if ( scursor == send ) return false;
1542 
1543  //
1544  // 2DIGIT representing "minute":
1545  //
1546  if ( !parseDigits( scursor, send, min ) ) return false;
1547 
1548  eatCFWS( scursor, send, isCRLF );
1549  if ( scursor == send ) return true; // seconds are optional
1550 
1551  //
1552  // let's see if we have a 2DIGIT representing "second":
1553  //
1554  if ( *scursor == ':' ) {
1555  // yepp, there are seconds:
1556  scursor++; // eat ':'
1557  eatCFWS( scursor, send, isCRLF );
1558  if ( scursor == send ) return false;
1559 
1560  if ( !parseDigits( scursor, send, sec ) ) return false;
1561  } else {
1562  sec = 0;
1563  }
1564 
1565  return true;
1566 }
1567 
1568 
1569 bool parseTime( const char* & scursor, const char * send,
1570  int & hour, int & min, int & sec, long int & secsEastOfGMT,
1571  bool & timeZoneKnown, bool isCRLF )
1572 {
1573  // time := time-of-day CFWS ( zone / obs-zone )
1574  //
1575  // obs-zone := "UT" / "GMT" /
1576  // "EST" / "EDT" / ; -0500 / -0400
1577  // "CST" / "CDT" / ; -0600 / -0500
1578  // "MST" / "MDT" / ; -0700 / -0600
1579  // "PST" / "PDT" / ; -0800 / -0700
1580  // "A"-"I" / "a"-"i" /
1581  // "K"-"Z" / "k"-"z"
1582 
1583  eatCFWS( scursor, send, isCRLF );
1584  if ( scursor == send ) return false;
1585 
1586  if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) )
1587  return false;
1588 
1589  eatCFWS( scursor, send, isCRLF );
1590  if ( scursor == send ) {
1591  timeZoneKnown = false;
1592  secsEastOfGMT = 0;
1593  return true; // allow missing timezone
1594  }
1595 
1596  timeZoneKnown = true;
1597  if ( *scursor == '+' || *scursor == '-' ) {
1598  // remember and eat '-'/'+':
1599  const char sign = *scursor++;
1600  // numerical timezone:
1601  int maybeTimeZone;
1602  if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false;
1603  secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
1604  if ( sign == '-' ) {
1605  secsEastOfGMT *= -1;
1606  if ( secsEastOfGMT == 0 )
1607  timeZoneKnown = false; // -0000 means indetermined tz
1608  }
1609  } else {
1610  // maybe alphanumeric timezone:
1611  if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) )
1612  return false;
1613  }
1614  return true;
1615 }
1616 
1617 
1618 bool parseDateTime( const char* & scursor, const char * const send,
1619  Types::DateTime & result, bool isCRLF )
1620 {
1621  // Parsing date-time; strict mode:
1622  //
1623  // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
1624  // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
1625  // time
1626  //
1627  // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
1628  // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
1629  // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez"
1630 
1631  struct tm maybeDateTime = {
1632 #ifdef HAVE_TM_GMTOFF
1633  0, 0, // initializers for members tm_gmtoff and tm_zone
1634 #endif
1635  0, 0, 0, 0, 0, 0, 0, 0, 0
1636  };
1637 
1638  eatCFWS( scursor, send, isCRLF );
1639  if ( scursor == send ) return false;
1640 
1641  //
1642  // let's see if there's a day-of-week:
1643  //
1644  if ( parseDayName( scursor, send ) ) {
1645  eatCFWS( scursor, send, isCRLF );
1646  if ( scursor == send ) return false;
1647  // day-name should be followed by ',' but we treat it as optional:
1648  if ( *scursor == ',' ) {
1649  scursor++; // eat ','
1650  eatCFWS( scursor, send, isCRLF );
1651  }
1652  }
1653 
1654  //
1655  // 1*2DIGIT representing "day" (of month):
1656  //
1657  int maybeDay;
1658  if ( !parseDigits( scursor, send, maybeDay ) ) return false;
1659 
1660  eatCFWS( scursor, send, isCRLF );
1661  if ( scursor == send ) return false;
1662 
1663  // success: store maybeDay in maybeDateTime:
1664  maybeDateTime.tm_mday = maybeDay;
1665 
1666  //
1667  // month-name:
1668  //
1669  int maybeMonth = 0;
1670  if ( !parseMonthName( scursor, send, maybeMonth ) ) return false;
1671  if ( scursor == send ) return false;
1672  assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
1673 
1674  eatCFWS( scursor, send, isCRLF );
1675  if ( scursor == send ) return false;
1676 
1677  // success: store maybeMonth in maybeDateTime:
1678  maybeDateTime.tm_mon = maybeMonth;
1679 
1680  //
1681  // 2*DIGIT representing "year":
1682  //
1683  int maybeYear;
1684  if ( !parseDigits( scursor, send, maybeYear ) ) return false;
1685  // RFC 2822 4.3 processing:
1686  if ( maybeYear < 50 )
1687  maybeYear += 2000;
1688  else if ( maybeYear < 1000 )
1689  maybeYear += 1900;
1690  // else keep as is
1691  if ( maybeYear < 1900 ) return false; // rfc2822, 3.3
1692 
1693  eatCFWS( scursor, send, isCRLF );
1694  if ( scursor == send ) return false;
1695 
1696  // success: store maybeYear in maybeDateTime:
1697  maybeDateTime.tm_year = maybeYear - 1900;
1698 
1699  //
1700  // time
1701  //
1702  int maybeHour, maybeMinute, maybeSecond;
1703  long int secsEastOfGMT;
1704  bool timeZoneKnown = true;
1705 
1706  if ( !parseTime( scursor, send,
1707  maybeHour, maybeMinute, maybeSecond,
1708  secsEastOfGMT, timeZoneKnown, isCRLF ) )
1709  return false;
1710 
1711  // success: store everything in maybeDateTime:
1712  maybeDateTime.tm_hour = maybeHour;
1713  maybeDateTime.tm_min = maybeMinute;
1714  maybeDateTime.tm_sec = maybeSecond;
1715  maybeDateTime.tm_isdst = DateFormatter::isDaylight();
1716  // now put everything together and check if mktime(3) likes it:
1717  result.time = mktime( &maybeDateTime );
1718  if ( result.time == (time_t)(-1) ) return false;
1719 
1720  // adjust to UTC/GMT:
1721  //result.time -= secsEastOfGMT;
1722  result.secsEastOfGMT = secsEastOfGMT;
1723  result.timeZoneKnown = timeZoneKnown;
1724 
1725  return true;
1726 }
1727 
1728 #if 0
1729 bool tryToMakeAnySenseOfDateString( const char* & scursor,
1730  const char * const send,
1731  time_t & result, bool isCRLF )
1732 {
1733  return false;
1734 }
1735 #endif
1736 
1737 } // namespace HeaderParsing
1738 
1739 } // namespace KMime
Abstract base class of codecs like base64 and quoted-printable.
Definition: kmime_codecs.h:57
virtual const char * name() const =0
Stateful decoder class, modelled after TQTextDecoder.
Definition: kmime_codecs.h:268
virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)=0
Decode a chunk of data, maintaining state information between calls.