bug-mailfromd


Search for: Advanced

Re: [Bug-mailfromd] attachment filtering / removal


Hello,

Attached is an MFL module for verifying if the given MIME message
contains an attachment whose filename matches the supplied POSIX regular
expression.  See comments for usage examples.

Comments and improvements are most welcome.

Regards,
Sergey

/* Search for attachment filename in a MIME message       -*- mfl -*-
 * Copyright (C) 2024 Sergey Poznyakoff
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * This module provides a function for verifying if a MIME message
 * contains an attachment with the file name matching a supplied
 * regular expression.
 *
 * Example usage:
 *    require 'attnameflt'
 *    prog eom
 *    do
 *      if mime_match_filename(current_message(), '\.exe$')
 *        // do something
 *      fi
 *    done
 */
module 'attnameflt'.
require status

#pragma regex push +extended

/*
 * Strip enclosing double-quotes off the string.  If the string is not quoted,
 * return it unchanged.
 */
func unquote(string in)
  returns string
do
  return sed(in, 's/^"(.*)"$/\1/')
done

/*
 * Decode string if it is encoded as per RFC 2047.  Return the argument
 * unchanged if not.
 */
func decode_2047(string filename)
  returns string
do
  if filename matches '^=\?([^?]+)\?([QB])\?'
    set result ''
    loop while filename matches '^(=\?([^?]+)\?([QB])\?[^[:space:]]+\?=)[[:space:]]*(.*)'
    do
      set coded \1
      set rest \2
      try
      do
	set result result . message_header_decode(coded)
      done
      catch *
      do
        set result result . coded
      done
      set filename rest
    done
    return result
  fi
  return filename
done

/*
 * If HEADER contains is a parameter value continuation (RFC 2231, Section 3)
 * of parameter HNAME, decode it.
 * Return empty string if not.
 */
func decode_2231_cont(string header, string hname)
  returns string
do
  if header matches ".*;[[:space:]]*%hname\\*=(([^']+)'([^']*)')?([^;]+)(;.*)?$"
    return filter_string(unquote(\4), "decode(percent)")
  elif header matches ".*;[[:space:]]*%hname\\*0(\\*?)?=(([^']+)'([^']*)')?([^;]+)(;.*)?$"
    set result ''
    set i 0
    loop
    do
      if header matches ".*;[[:space:]]*%hname\\*%i(\\*?)?=(([^']+)'([^']*)')?([^;]+)(;.*)?$"
	set charset \3
        set s \5
	set header \6
        if charset != ''
          set result result . filter_string(s, "decode(percent)")
        else
          set result result . decode_2047(s)
        fi
	set i i + 1
      else
	break
      fi
    done
    return result
  fi
  return ''
done

/*
 * Recursively descend into MIME structure of the message MSG, looking
 * for attachments matching extended regular expression PATTERN.  Return 1
 * if a match is found, 0 otherwise.
 *
 * Additional parameters:
 *    max_level     If not 0, cut off at this nesting level;
 *    level         Current nesting level;
 *    pid           Part identifier for diagnostic messages.
 */
func _mimeparse(number msg, string pattern,
                number max_level, number level, string pid)
  returns number
do
  if message_is_multipart(msg)
    if max_level > 0 and level == max_level
      echo "%pid: WARNING: cut off at level %level"
      return 0
    fi
    set numparts message_count_parts(msg)
    loop for set i 1,
	 while i <= numparts,
	 set i i + 1
    do
      if _mimeparse(message_get_part(msg, i), pattern,
	            max_level, level + 1, "%pid.%i")
        return 1
      fi
    done
  else
    string filename
    try
    do
      set s unfold(message_find_header(msg, "Content-Disposition"))
      if s matches '^attachment;'
	if s matches ';[[:space:]]*filename=([^;]+)'
	  set filename decode_2047(unquote(\1))
        else
	  set filename decode_2231_cont(s, 'filename')
	fi
      fi
    done
    catch not_found
    do
      try
      do
	set s unfold(message_find_header(msg, "Content-Type"))
	if s matches '[^/]+/.+;[[:space:]]*name=([^;]+)'
  	  set filename decode_2047(unquote(\1))
        else
	  set filename decode_2231_cont(s, 'name')
	fi
      done
      catch not_found
      do
	pass
      done
    done
    if filename matches pattern
      return 1
    fi
  fi
  return 0
done

/*
 * Recursively descend into MIME structure of the message MSG, looking
 * for attachments matching extended regular expression PATTERN.  Return 1
 * if a match is found, 0 otherwise.  Optional parameter max_level limits
 * the descent level.
 */
func mime_match_filename(number msg, string pattern; number max_level)
  returns number
do
  return _mimeparse(msg, pattern, max_level, 0, "MSG")
done
#pragma regex pop