U
    @6^                     @   s`   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
 G dd de	ZdS )    N)
SpiderFootSpiderFootPluginSpiderFootEventc                   @   s\   e Zd ZdZdddddddgd	d
Zddd
ZdZe fddZdd Z	dd Z
dd ZdS )sfp_filemetazWFile Metadata:Footprint:Content Analysis::Extracts meta data from documents and images.docxpptxpdfjpgjpegtifftifi,  )fileextstimeoutzlFile extensions of files you want to analyze the meta data of (only PDF, DOCX, XLSX and PPTX are supported.)z'Download timeout for files, in seconds.Nc                 C   s4   || _ |  | _t| D ]}|| | j|< qd S )N)sftempStorageresultslistkeysopts)selfsfcuserOptsopt r   7/var/www/spiderfoot.crq.systems/modules/sfp_filemeta.pysetup)   s    
zsfp_filemeta.setupc                 C   s   ddgS )NLINKED_URL_INTERNALZINTERESTING_FILEr   r   r   r   r   watchedEvents1   s    zsfp_filemeta.watchedEventsc                 C   s   ddgS )NRAW_FILE_META_DATASOFTWARE_USEDr   r   r   r   r   producedEvents7   s    zsfp_filemeta.producedEventsc                 C   s  |j }|j}|j}| jd| d |  || jkr8d S d| j|< | jd D ]}|  r` d S d|  | krL| jj	|| jd | jd ddd	}|d
 d kr| j
d| d  d S t|d
 dk r| j
dtt|d
  d  d S d }d }| dkrz@t|d
 }	tj|	dd}
|
 }t|}| jd|  W nN tk
r } z.| j
d| d t| d d W Y  d S d }~X Y nX | dkr\zjt|d
 }t|}t|d }| jdt|  |jj}|jj}dd ||fD }d|}W nN tk
rZ } z.| j
d| d t| d d W Y  d S d }~X Y nX | dkr$zjt|d
 }t|}t|d }| jdt|  |jj}|jj}dd ||fD }d|}W nN tk
r" } z.| j
d| d t| d d W Y  d S d }~X Y nX | dkrz@t|d
 }	t|	}|d ksdt|dkrhW qLt|}W nN tk
r } z.| j
d| d t| d d W Y  d S d }~X Y nX |d k	rL|d k	rLtd|| j |}| !| t" }ztd |kr|#t|d   d!|kr,|#t|d!  d"|krH|#t|d"  d#|krd|#t|d#  W nJ tk
r } z*| j
d$| d% t| d W Y  d S d }~X Y nX |D ]^}|rt$|tj%j&s| jd&t|  d'd(d |D }td)|| j |}| !| qqLd S )*NzReceived event, z, from Tr   .r   
_useragenti )r   	useragent
dontMangle	sizeLimitcontentz(Unable to fetch file for meta analysis: Fi   z%Strange content encountered, size of r   )strictzObtained meta data from z Unable to parse meta data from: ())r   r   zOffice type: c                 S   s   g | ]}|r|qS r   r   .0_fr   r   r   
<listcomp>s   s      z,sfp_filemeta.handleEvent.<locals>.<listcomp>z, zUnable to process file: )r   c                 S   s   g | ]}|r|qS r   r   r+   r   r   r   r.      s      )r	   r
   r   r   z	/Producerz/CreatorApplicationzImage SoftwarezFailed to parse PDF, z: zVAL:  c                 S   s    g | ]}t |d k r|ndqS )    )ord)r,   ir   r   r   r.      s     r    )'	eventTypemoduledatar   debugr   r   checkForStoplowerfetchUrlerrorlenstrioBytesIOPyPDF2ZPdfFileReaderZgetDocumentInfoBaseExceptionr   ZDocument	mimetypes
guess_typeZcore_propertiesZauthorcommentsjoinr   ZPresentationexifreadZprocess_filer   __name__notifyListenersr   append
isinstancegenericZ
NullObject)r   event	eventNamesrcModuleName	eventDatafileExtretmetar7   rawr   ecdocmtypeaevtvalvr   r   r   handleEvent;   s   

 


 




zsfp_filemeta.handleEvent)rH   
__module____qualname____doc__r   optdescsr   dictr   r   r!   r]   r   r   r   r   r      s   r   )rC   rA   r   r   rG   lxmlr?   sflibr   r   r   r   r   r   r   r   <module>   s   