3
@6^                 @   s0   d dl Z d dlmZmZmZ G dd deZdS )    N)
SpiderFootSpiderFootPluginSpiderFootEventc               @   sF   e Zd ZdZddiZddiZe fddZdd Zd	d
 Z	dd Z
dS )sfp_companyzjCompany Names:Footprint,Investigate,Passive:Content Analysis::Identify company names in any obtained data.filterjscssTzFilter out company names that originated from CSS/JS content. Enabling this avoids detection of popular Javascript and web framework author company names.c             C   s.   || _ x"t|j D ]}|| | j|< qW d S )N)sflistkeysopts)selfsfcuserOptsopt r   6/var/www/spiderfoot.crq.systems/modules/sfp_company.pysetup    s    zsfp_company.setupc             C   s   ddddddgS )NTARGET_WEB_CONTENTSSL_CERTIFICATE_ISSUEDZDOMAIN_WHOISZNETBLOCK_WHOISZAFFILIATE_DOMAIN_WHOISZAFFILIATE_WEB_CONTENTr   )r   r   r   r   watchedEvents'   s    zsfp_company.watchedEventsc             C   s   ddgS )NCOMPANY_NAMEAFFILIATE_COMPANY_NAMEr   )r   r   r   r   producedEvents/   s    zsfp_company.producedEventsc          $   C   s  |j }|j}|j}d}dddddddd	d
dddddddddddddddg}dddddddddd dd!dd"ddd#dd$dd%dg}d&}d'd(g}	|dHkrd S |d+kr|j}
| jd, rd-|
ksd.|
kr| jjd/ d S | jjd0| d1 | d2 tt| d3  t	|tkrxy<t	|t
tgkr"t|}n| jjd4tt	|  d S W n6 tk
rv } z| jjd5t|  d S d }~X nX y|d6kr|jd7d8 }W n0 tk
r } z| jjd9 W Y d d }~X nX t
 }x|D ]}d:}|j||}xr|d:krX|d; }|d:k rd:}|d< }|t|kr*t|d8 }|j|||  |t| }|j||}qW qW t
 }xp|D ]f}x\|D ]R}tj|d= | d> | |tjtjB }x"|D ]}d:}x$|D ]}t|d:kr|d87 }qW |d8krqd?}xF|D ]>}d@}x |	D ]}tj||rdA}qW |s||dB 7 }qW tjdCdB|j }| jjdD|  ||krx| jjdE qn
|j| dF|krd*}nd)}t||| j|}|jr|j|_ndG|_| j| qW q|W qnW d S )INz(?=[,;:'">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ "';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ "';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ "';:><]*)?\s+ZLLCz
L\.L\.C\.?ZAGzA\.G\.?ZGmbHzPty\.?\s+Ltd\.?zLtd\.?zPte\.?zInc\.?zINC\.?ZIncorporatedZ
FoundationzCorp\.?ZCorporationZSAzS\.A\.?ZSIAZBVzB\.V\.?ZNVz
N\.V\.?PLCZLimitedzPvt\.?\s+Ltd\.?ZSARLzL.L.CzA.GZPtyZLtdZPteZIncZINCZCorpzS.AzB.VzN.VPLCzPvt.z(?=[ \.,:<\)'"]|[$
])Z	Copyrightz\d{4}r   r   r   r   z.jsz.cssz!Ignoring web content from CSS/JS.zReceived event, z, from z: z bytes.z&Unhandled type to find company names: z'Unable to convert list/dict to string: r   zO=   z+Couldn't strip out O=, proceeding anyway...r   2   
   () FT z\s+zFound company name: zAlready found from this source.Z
AFFILIATE_Unknown)r   r   )	eventTypemoduledataactualSourcer
   r   debugstrlentyper   dictBaseExceptionsplitfindappendrefindall	MULTILINEDOTALLmatchsubstripinfor   __name__moduleDataSourcenotifyListeners)r   event	eventNamesrcModuleName	eventDataZpattern_prefixZpattern_match_reZpattern_matchZpattern_suffixZfilterpatternsurlechunkspatstartmendoffsetmyreschunkmatchesr1   matchedZfullcompanyfltfetypeevtr   r   r   handleEvent3   s    
,



&








zsfp_company.handleEventN)r5   
__module____qualname____doc__r
   optdescsr(   r   r   r   rL   r   r   r   r   r      s   r   )r-   sflibr   r   r   r   r   r   r   r   <module>   s   