viper-framework · SteveClement · Jun 14, 2018 · Jun 14, 2018 · Jun 14, 2018 · Jun 14, 2018
diff --git a/pdf-parser.py b/pdf-parser.py
@@ -2,10 +2,10 @@
 
 __description__ = 'pdf-parser, use it to parse a PDF document'
 __author__ = 'Didier Stevens'
-__version__ = '0.6.7'
-__date__ = '2016/12/17'
+__version__ = '0.6.8'
+__date__ = '2017/10/29'
 __minimum_python_version__ = (2, 5, 1)
-__maximum_python_version__ = (3, 4, 3)
+__maximum_python_version__ = (3, 6, 3)
 
 """
 Source code put in public domain by Didier Stevens, no Copyright
@@ -58,6 +58,9 @@
   2016/07/27: V0.6.5 bugfix whitespace 0x00 0x0C after stream 0x0D 0x0A reported by @mr_me
   2016/11/20: V0.6.6 added workaround zlib errors FlateDecode
   2016/12/17: V0.6.7 added option -k
+  2017/01/07: V0.6.8 changed cPDFParseDictionary to handle strings () with % character
+  2017/10/28: fixed bug
+  2017/10/29: added # support for option -y
 
 Todo:
   - handle printf todo
@@ -146,7 +149,9 @@ def Obj2Str(content):
 class cPDFDocument:
     def __init__(self, file):
         self.file = file
-        if file.lower().startswith('http://') or file.lower().startswith('https://'):
+        if type(file) != str:
+        	  self.infile = file
+        elif file.lower().startswith('http://') or file.lower().startswith('https://'):
             try:
                 if sys.hexversion >= 0x020601F0:
                     self.infile = urllib23.urlopen(file, timeout=5)
@@ -275,6 +280,14 @@ def TokenIgnoreWhiteSpace(self):
             token = self.Token()
         return token
 
+    def Tokens(self):
+        tokens = []
+        token = self.Token()
+        while token != None:
+            tokens.append(token)
+            token = self.Token()
+        return tokens
+
     def unget(self, byte):
         self.ungetted.append(byte)
 
@@ -643,7 +656,7 @@ def __init__(self, content, nocanonicalizedoutput):
         dataTrimmed = TrimLWhiteSpace(TrimRWhiteSpace(self.content))
         if dataTrimmed == []:
             self.parsed = None
-        elif self.isOpenDictionary(dataTrimmed[0]) and self.isCloseDictionary(dataTrimmed[-1]):
+        elif self.isOpenDictionary(dataTrimmed[0]) and (self.isCloseDictionary(dataTrimmed[-1]) or self.couldBeCloseDictionary(dataTrimmed[-1])):
             self.parsed = self.ParseDictionary(dataTrimmed)[0]
         else:
             self.parsed = None
@@ -654,6 +667,9 @@ def isOpenDictionary(self, token):
     def isCloseDictionary(self, token):
         return token[0] == CHAR_DELIMITER and token[1] == '>>'
 
+    def couldBeCloseDictionary(self, token):
+        return token[0] == CHAR_DELIMITER and token[1].rstrip().endswith('>>')
+
     def ParseDictionary(self, tokens):
         state = 0 # start
         dictionary = []
@@ -694,7 +710,11 @@ def ParseDictionary(self, tokens):
                 elif value == [] and tokens[0][1] == '(':
                     value.append(tokens[0][1])
                 elif value != [] and value[0] == '(' and tokens[0][1] != ')':
-                    value.append(tokens[0][1])
+                    if tokens[0][1][0] == '%':
+                        tokens = [tokens[0]] + cPDFTokenizer(StringIO(tokens[0][1][1:])).Tokens() + tokens[1:]
+                        value.append('%')
+                    else:
+                        value.append(tokens[0][1])
                 elif value != [] and value[0] == '(' and tokens[0][1] == ')':
                     value.append(tokens[0][1])
                     dictionary.append((key, value))
@@ -749,7 +769,7 @@ def GetNestedSub(self, dictionary, select):
         for key, value in dictionary:
             if key == select:
                 return self.PrettyPrintSubElement('', [select, value])
-            if type(value) == type([]) and type(value[0]) == type((None,)):
+            if type(value) == type([]) and len(value) > 0 and type(value[0]) == type((None,)):
                 result = self.GetNestedSub(value, select)
                 if result !=None:
                     return self.PrettyPrintSubElement('', [select, result])
@@ -1085,17 +1105,30 @@ def ProcessAt(argument):
     else:
         return [argument]
 
-def YARACompile(fileordirname):
-    dFilepaths = {}
-    if os.path.isdir(fileordirname):
-        for root, dirs, files in os.walk(fileordirname):
-            for file in files:
-                filename = os.path.join(root, file)
-                dFilepaths[filename] = filename
+def YARACompile(ruledata):
+    if ruledata.startswith('#'):
+        if ruledata.startswith('#h#'):
+            rule = binascii.a2b_hex(ruledata[3:])
+        elif ruledata.startswith('#b#'):
+            rule = binascii.a2b_base64(ruledata[3:])
+        elif ruledata.startswith('#s#'):
+            rule = 'rule string {strings: $a = "%s" ascii wide nocase condition: $a}' % ruledata[3:]
+        elif ruledata.startswith('#q#'):
+            rule = ruledata[3:].replace("'", '"')
+        else:
+            rule = ruledata[1:]
+        return yara.compile(source=rule)
     else:
-        for filename in ProcessAt(fileordirname):
-            dFilepaths[filename] = filename
-    return yara.compile(filepaths=dFilepaths)
+        dFilepaths = {}
+        if os.path.isdir(ruledata):
+            for root, dirs, files in os.walk(ruledata):
+                for file in files:
+                    filename = os.path.join(root, file)
+                    dFilepaths[filename] = filename
+        else:
+            for filename in ProcessAt(ruledata):
+                dFilepaths[filename] = filename
+        return yara.compile(filepaths=dFilepaths)
 
 def AddDecoder(cClass):
     global decoders
@@ -1447,7 +1480,8 @@ def Main():
             print('StartXref: %s' % cntStartXref)
             print('Indirect object: %s' % cntIndirectObject)
             names = dicObjectTypes.keys()
-            for key in sorted(names):
+            names.sort()
+            for key in names:
                 print(' %s %d: %s' % (key, len(dicObjectTypes[key]), ', '.join(map(lambda x: '%d' % x, dicObjectTypes[key]))))
 
         if options.generate or options.generateembedded != 0:

diff --git a/pdfid.py b/pdfid.py
@@ -2,8 +2,8 @@
 
 __description__ = 'Tool to test a PDF file'
 __author__ = 'Didier Stevens'
-__version__ = '0.2.1'
-__date__ = '2014/10/18'
+__version__ = '0.2.4'
+__date__ = '2018/01/29'
 
 """
 
@@ -45,6 +45,14 @@
   2014/09/30: added CSV header
   2014/10/16: V0.2.1: added output when plugin & file not pdf
   2014/10/18: some fixes for Python 3
+  2015/08/12: V0.2.2: added option pluginoptions
+  2015/08/13: added plugin Instructions method
+  2016/04/12: added option literal
+  2017/10/29: added pdfid.ini support
+  2017/11/05: V0.2.3: added option -n
+  2018/01/03: V0.2.4: bugfix entropy calculation for PDFs without streams; sample 28cb208d976466b295ee879d2d233c8a https://twitter.com/DubinRan/status/947783629123416069
+  2018/01/15: bugfix ConfigParser privately reported
+  2018/01/29: bugfix oPDFEOF.cntCharsAfterLastEOF when no %%EOF
 
 Todo:
   - update XML example (entropy, EOF)
@@ -64,14 +72,14 @@
 import zipfile
 import collections
 import glob
-try:
-    import urllib2
-    urllib23 = urllib2
-except:
-    import urllib.request
-    urllib23 = urllib.request
-
-plugins = []
+if sys.version_info[0] >= 3:
+    import urllib.request as urllib23
+else:
+    import urllib2 as urllib23
+if sys.version_info[0] >= 3:
+    import configparser as ConfigParser
+else:
+    import ConfigParser
 
 #Convert 2 Bytes If Python 3
 def C2BIP3(string):
@@ -236,7 +244,10 @@ def calc(self):
         allCount = sum(self.allBucket)
         streamCount = sum(self.streamBucket)
         nonStreamCount = sum(self.nonStreamBucket)
-        return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
+        if streamCount == 0:
+            return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, None, nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
+        else:
+            return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
 
 class cPDFEOF:
     def __init__(self):
@@ -348,6 +359,18 @@ def XMLAddAttribute(xmlDoc, name, value=None):
     xmlDoc.documentElement.setAttributeNode(att)
     if value != None:
         att.nodeValue = value
+    return att
+
+def ParseINIFile():
+    oConfigParser = ConfigParser.ConfigParser(allow_no_value=True)
+    oConfigParser.optionxform = str
+    oConfigParser.read(os.path.join(os.path.dirname(sys.argv[0]), 'pdfid.ini'))
+    keywords = []
+    if oConfigParser.has_section('keywords'):
+        for key, value in oConfigParser.items('keywords'):
+            if not key in keywords:
+                keywords.append(key)
+    return keywords
 
 def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
     """Example of XML output:
@@ -379,7 +402,7 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
     hexcode = False
     lastName = ''
     insideStream = False
-    keywords = ('obj',
+    keywords = ['obj',
                 'endobj',
                 'stream',
                 'endstream',
@@ -399,9 +422,12 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
                 '/Launch',
                 '/EmbeddedFile',
                 '/XFA',
-               )
+               ]
     words = {}
     dates = []
+    for extrakeyword in ParseINIFile():
+        if not extrakeyword in keywords:
+            keywords.append(extrakeyword)
     for keyword in keywords:
         words[keyword] = [0, 0]
     slash = ''
@@ -534,7 +560,10 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
         (countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc()
         attEntropyAll.nodeValue = '%f' % entropyAll
         attCountAll.nodeValue = '%d' % countAll
-        attEntropyStream.nodeValue = '%f' % entropyStream
+        if entropyStream == None:
+            attEntropyStream.nodeValue = 'N/A     '
+        else:
+            attEntropyStream.nodeValue = '%f' % entropyStream
         attCountStream.nodeValue = '%d' % countStream
         attEntropyNonStream.nodeValue = '%f' % entropyNonStream
         attCountNonStream.nodeValue = '%d' % countNonStream
@@ -551,7 +580,10 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
     xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF)
     if oPDFEOF != None:
         attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs
-        attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF
+        if oPDFEOF.cntEOFs > 0:
+            attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF
+        else:
+            attCountCharsAfterLastEOF.nodeValue = ''
     else:
         attCountEOF.nodeValue = ''
         attCountCharsAfterLastEOF.nodeValue = ''
@@ -610,18 +642,19 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
         eleDate.setAttributeNode(att)
     return xmlDoc
 
-def PDFiD2String(xmlDoc, force):
+def PDFiD2String(xmlDoc, nozero, force):
     result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename'))
     if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True':
         return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage')
     if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False':
         return result + ' Not a PDF document\n'
     result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header')
     for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
-        result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count')))
-        if int(node.getAttribute('HexcodeCount')) > 0:
-            result += '(%d)' % int(node.getAttribute('HexcodeCount'))
-        result += '\n'
+        if not nozero or nozero and int(node.getAttribute('Count')) > 0:
+            result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count')))
+            if int(node.getAttribute('HexcodeCount')) > 0:
+                result += '(%d)' % int(node.getAttribute('HexcodeCount'))
+            result += '\n'
     if xmlDoc.documentElement.getAttribute('CountEOF') != '':
         result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF')))
     if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '':
@@ -705,7 +738,7 @@ def MakeCSVLine(fields, separator=';', quote='"'):
 def ProcessFile(filename, options, plugins):
     xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force)
     if plugins == [] and options.select == '':
-        Print(PDFiD2String(xmlDoc, options.force), options)
+        Print(PDFiD2String(xmlDoc, options.nozero, options.force), options)
         return
 
     oPDFiD = cPDFiD(xmlDoc, options.force)
@@ -723,12 +756,12 @@ def ProcessFile(filename, options, plugins):
                 if options.csv:
                     Print(filename, options)
                 else:
-                    Print(PDFiD2String(xmlDoc, options.force), options)
+                    Print(PDFiD2String(xmlDoc, options.nozero, options.force), options)
     else:
         for cPlugin in plugins:
             if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF:
                 try:
-                    oPlugin = cPlugin(oPDFiD)
+                    oPlugin = cPlugin(oPDFiD, options.pluginoptions)
                 except Exception as e:
                     Print('Error instantiating plugin: %s' % cPlugin.name, options)
                     if options.verbose:
@@ -748,16 +781,20 @@ def ProcessFile(filename, options, plugins):
                         Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options)
                 else:
                     if score >= options.minimumscore:
-                        Print(PDFiD2String(xmlDoc, options.force), options)
-                        Print('%s score: %.02f' % (cPlugin.name, score), options)
+                        Print(PDFiD2String(xmlDoc, options.nozero, options.force), options)
+                        Print('%s score:        %.02f' % (cPlugin.name, score), options)
+                        try:
+                            Print('%s instructions: %s' % (cPlugin.name, oPlugin.Instructions(score)), options)
+                        except AttributeError:
+                            pass
             else:
                 if options.csv:
                     if oPDFiD.errorOccured:
                         Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Error occured'))), options)
                     if not oPDFiD.isPDF:
                         Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options)
                 else:
-                    Print(PDFiD2String(xmlDoc, options.force), options)
+                    Print(PDFiD2String(xmlDoc, options.nozero, options.force), options)
 
 
 def Scan(directory, options, plugins):
@@ -909,7 +946,10 @@ def Main():
     oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
     oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
     oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
+    oParser.add_option('-n', '--nozero', action='store_true', default=False, help='supress output for counts equal to zero')
     oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
+    oParser.add_option('--pluginoptions', type=str, default='', help='options for the plugin')
+    oParser.add_option('-l', '--literal', action='store_true', default=False, help='take filenames literally, no wildcards')
     (options, args) = oParser.parse_args()
 
     if len(args) == 0:
@@ -920,6 +960,8 @@ def Main():
             print('Option scan not supported with stdin')
             options.scan = False
         filenames = ['']
+    elif options.literal:
+        filenames = args
     else:
         try:
             filenames = ExpandFilenameArguments(args)

diff --git a/plugin_embeddedfile.py b/plugin_embeddedfile.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
 #2014/10/13
-from .pdfid import cPluginParent, AddPlugin
 
 class cPDFiDEmbeddedFile(cPluginParent):
 #    onlyValidPDF = True

diff --git a/plugin_nameobfuscation.py b/plugin_nameobfuscation.py
@@ -2,7 +2,6 @@
 
 #2013/11/04
 #2013/11/08
-from .pdfid import cPluginParent, AddPlugin
 
 class cPDFiDNameObfuscation(cPluginParent):
 #    onlyValidPDF = True