U
    @6^G                      @   s0   d dl mZmZmZ d dlZG dd deZdS )    )
SpiderFootSpiderFootPluginSpiderFootEventNc                   @   s   e Zd ZdZdddgdddgddd	d
ddgddddgddZddddddZdZdZdZdZ	e
 fddZdd Zdd Zdd  Zd!d" ZdS )#sfp_junkfilesztJunk Files:Footprint:Crawling and Scanning:slow,errorprone,invasive:Looks for old/temporary and other similar files.tmpZbakoldZaspZphpZjsppasswdz	.htaccessz	.htpasswdz	Thumbs.dbbackupzipztar.gzZtgztarT)fileexts
urlextstryfilesdirsskipfakezFile extensions to try.z8Try those extensions against URLs with these extensions.z?Try to fetch each of these files from the directory of the URL.z9Try to fetch the containing folder with these extensions.zTry to fetch an obviously fake page and if no 404 is returned, stop trying that particular host for junk files. Good for avoiding false positives in cases where servers return content for pages that don't exist.Nc                 C   sX   || _ |  | _|  | _|  | _|  | _d| _t| D ]}|| | j	|< q@d S )NzTarget Website)
sftempStorageresultshosts	skiphostsbases__dataSource__listkeysopts)selfsfcuserOptsopt r   8/var/www/spiderfoot.crq.systems/modules/sfp_junkfiles.pysetup+   s    



zsfp_junkfiles.setupc                 C   s   dgS )NLINKED_URL_INTERNALr   r   r   r   r    watchedEvents7   s    zsfp_junkfiles.watchedEventsc                 C   s   dgS )N	JUNK_FILEr   r#   r   r   r    producedEvents=   s    zsfp_junkfiles.producedEventsc                 C   sb   |t t dd }| jj|d| jd | jd d}|d dkr^| j|}d| j|< d	S dS )
Nr   iT_fetchtimeout
_useragentheadOnlytimeout	useragentcode404F)	strrandomSystemRandomrandintr   fetchUrlr   
urlBaseUrlr   )r   ZjunkUrlfetchreshostr   r   r    checkValidityA   s    

zsfp_junkfiles.checkValidityc              	   C   s  |j }|j}|j}| j|}| jd| d |  || jkrDd S d| j|< | jd rz|| jkrz| jd| d  d S | jd D ]\}| jd r|| jkr| jd| d   d S d| d	 |ksd| d
 |ks|	d| r|
d	}| jd D ]}|  r  d S | jd| d |  |d d | }	|	| jkrPd| j|	< n| jd q| jj|	d| jd | jd dd}
|
d |	kr| jd|
d  d |	  q|
d dkr| |	sqtd|	| j|}| | qq| j|}|r|| jkrd S d| j|< | jd| d |  ||d ks@||krDd S | jd D ]J}|  rd d S | jd r|| jkr| jd| d   d S |ddkr| jd qN| jd| d |  |dt|d   d | }	|	| jkrd| j|	< n| jd qN| jj|	d| jd | jd d!}
|
d |	kr`| jd|
d  d |	  qN|
d dkrN| |	s~qNtd|	| j|}| | qN| jd" D ]}|  r d S | jd r|| jkr| jd| d   d S | jd| d |  || }	|	| jkr&d| j|	< n| jd q| jj|	d| jd | jd d!}
|
d |	kr| jd|
d  d |	  q|
d dkr| |	sqtd|	| j|}| | qd S )#NzReceived event, z, from Tr   z	Skipping z  because it doesn't return 404s.r   .?#r   zTrying z	 against r   zSkipping, already fetched.r'   r(   i )r*   r+   r,   	sizeLimitrealurlzSkipping because z isn't the fetched URL of r-   200r%   zBase: z	, event: /r      zSkipping base url.   r)   r   )	eventTypemoduledatar   r4   debugr   r   r   endswithsplitcheckForStopr3   r8   r   __name__notifyListeners
urlBaseDirr   countlen)r   event	eventNamesrcModuleName	eventDatar7   extbitsxr5   r6   evtbaseZdirfilefr   r   r    handleEventN   s    

 







zsfp_junkfiles.handleEvent)rI   
__module____qualname____doc__r   optdescsr   r   r   r   dictr!   r$   r&   r8   rX   r   r   r   r    r      s2    
r   )sflibr   r   r   r0   r   r   r   r   r    <module>   s   