o
    Td\0                     @   s  d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
mZmZmZmZmZ d dlmZ d dlmZmZmZmZ d dlmZmZ d dlmZmZmZ d dlmZ d d	l m!Z! d
d Z"G dd deZ#G dd deZ$dd Z%edddG dd deZ&edddG dd deZ'edddG dd deZ(G dd deZ)e			d*ddZ*e		d+d d!Z+		d,d"d#Z,d$d% Z-d&d' Z.G d(d) d)e/Z0dS )-    )ConcreteTemplate)typestypingfuncdescconfigcompilersigutils)sanitize_compile_result_entriesCompilerBaseDefaultPassBuilderFlagsOptionCompileResult)global_compiler_lock)LoweringPassAnalysisPassPassManagerregister_pass)NumbaInvalidConfigWarningTypingError)IRLegalizationNativeLoweringAnnotateTypes)warn)get_current_devicec                 C   s   | d u rd S t | tsJ | S N)
isinstancedict)x r   f/home/ncw/WWW/www-new/content/articles/pi-bbp/venv/lib/python3.10/site-packages/numba/cuda/compiler.py_nvvm_options_type   s   r!   c                   @   s(   e Zd ZeedddZeedddZdS )	CUDAFlagsNzNVVM options)typedefaultdoczCompute Capability)__name__
__module____qualname__r   r!   nvvm_optionstuplecompute_capabilityr   r   r   r    r"      s    
r"   c                   @   s   e Zd Zedd ZdS )CUDACompileResultc                 C   s   t | S r   )idselfr   r   r    entry_point7   s   zCUDACompileResult.entry_pointN)r&   r'   r(   propertyr0   r   r   r   r    r,   6   s    r,   c                  K   s   t | } tdi | S )Nr   )r	   r,   )entriesr   r   r    cuda_compile_result<   s   r3   TF)mutates_CFGanalysis_onlyc                   @       e Zd ZdZdd Zdd ZdS )CUDABackendcuda_backendc                 C      t |  d S r   r   __init__r.   r   r   r    r;   F      zCUDABackend.__init__c              
   C   sL   |d }t j|jg|jR  }t|j|j|jj|j	|j
|j||jd|_dS )zH
        Back-end: Packages lowering output in a compile result
        cr)typing_contexttarget_contexttyping_errortype_annotationlibrarycall_helper	signaturefndescT)r   rD   return_typeargsr3   	typingctx	targetctxstatusfail_reasonrA   rB   rC   rE   r=   )r/   stateloweredrD   r   r   r    run_passI   s   
zCUDABackend.run_passNr&   r'   r(   _namer;   rN   r   r   r   r    r7   A       r7   c                   @   s$   e Zd ZdZdZdd Zdd ZdS )CreateLibraryz
    Create a CUDACodeLibrary for the NativeLowering pass to populate. The
    NativeLowering pass will create a code library if none exists, but we need
    to set it up with nvvm_options from the flags if they are present.
    create_libraryc                 C   r9   r   r:   r.   r   r   r    r;   g   r<   zCreateLibrary.__init__c                 C   s8   |j  }|jj}|jj}|j||d|_|j  dS )N)r)   T)	rI   codegenfunc_idfunc_qualnameflagsr)   rS   rB   enable_object_caching)r/   rL   rT   namer)   r   r   r    rN   j   s   

zCreateLibrary.run_passN)r&   r'   r(   __doc__rP   r;   rN   r   r   r   r    rR   ]   s
    rR   c                   @   r6   )CUDALegalizationcuda_legalizationc                 C   r9   r   )r   r;   r.   r   r   r    r;   z   r<   zCUDALegalization.__init__c                    sX   ddl m} | jrdS |j} fdd | D ]\}t|tjr) |j qdS )Nr   )NVVMFc                    sV   t | tjtjfr d}t|t | tjr'| j D ]} |d j qd S d S )Nz is a char sequence type. This type is not supported with CUDA toolkit versions < 11.2. To use this type, you need to update your CUDA toolkit - try 'conda install cudatoolkit=11' if you are using conda to manage your environment.   )	r   r   UnicodeCharSeqCharSeqr   Recordfieldsitemsr#   )dtypemsgsubdtypecheck_dtypekr   r    rh      s   
z.CUDALegalization.run_pass.<locals>.check_dtype)	numba.cuda.cudadrv.nvvmr]   	is_nvvm70typemaprc   r   r   Arrayrd   )r/   rL   r]   typmapvr   rg   r    rN   }   s   
zCUDALegalization.run_passNrO   r   r   r   r    r[   u   rQ   r[   c                   @   s   e Zd Zdd Zdd ZdS )CUDACompilerc                 C   st   t }td}|| j}|j|j || j}|j|j |td | 	| j}|j|j |
  |gS )NcudazCUDA legalization)r   r   define_untyped_pipelinerL   passesextenddefine_typed_pipelineadd_passr[   define_cuda_lowering_pipelinefinalize)r/   dpbpmuntyped_passestyped_passeslowering_passesr   r   r    define_pipelines   s   zCUDACompiler.define_pipelinesc                 C   sP   t d}|td |td |td |td |td |  |S )Ncuda_loweringz$ensure IR is legal prior to loweringzannotate typeszcreate libraryznative loweringzcuda backend)r   rv   r   r   rR   r   r7   rx   )r/   rL   rz   r   r   r    rw      s   z*CUDACompiler.define_cuda_lowering_pipelineN)r&   r'   r(   r~   rw   r   r   r   r    rp      s    rp   Nc	                 C   s   |d u rt dddlm}	 |	j}
|	j}t }d|_d|_d|_|s$|r'd|_	|r,d|_
|r2d|_nd|_|r:d|_|r?d|_|rD||_||_ddlm} |d	 tj|
|| |||i td
}W d    n1 siw   Y  |j}|  |S )Nz#Compute Capability must be suppliedr^   cuda_targetTpythonnumpyr   )target_overriderq   )rH   rI   funcrG   rF   rW   localspipeline_class)
ValueError
descriptorr   r>   r?   r"   
no_compileno_cpython_wrapperno_cfunc_wrapper	debuginfodbg_directives_onlyerror_modelforceinlinefastmathr)   r+   numba.core.target_extensionr   r   compile_extrarp   rB   rx   )pyfuncrF   rG   debuglineinfoinliner   r)   ccr   rH   rI   rW   r   cresrB   r   r   r    compile_cuda   sL   

r   c              
   C   s   |r|rd}t t| ||rdndd}	t|\}
}|p tj}t| ||
||||	|d}|jj}|r>|s>|t	j
kr>td|rD|j}n|j}| j}|j}|j}||j|j|||	||\}}|j|d}||fS )a  Compile a Python function to PTX for a given set of argument types.

    :param pyfunc: The Python function to compile.
    :param sig: The signature representing the function's input and output
                types.
    :param debug: Whether to include debug info in the generated PTX.
    :type debug: bool
    :param lineinfo: Whether to include a line mapping from the generated PTX
                     to the source code. Usually this is used with optimized
                     code (since debug mode would automatically include this),
                     so we want debug info in the LLVM but only the line
                     mapping in the final PTX.
    :type lineinfo: bool
    :param device: Whether to compile a device function. Defaults to ``False``,
                   to compile global kernel functions.
    :type device: bool
    :param fastmath: Whether to enable fast math flags (ftz=1, prec_sqrt=0,
                     prec_div=, and fma=1)
    :type fastmath: bool
    :param cc: Compute capability to compile for, as a tuple
               ``(MAJOR, MINOR)``. Defaults to ``(5, 0)``.
    :type cc: tuple
    :param opt: Enable optimizations. Defaults to ``True``.
    :type opt: bool
    :return: (ptx, resty): The PTX code and inferred return type
    :rtype: tuple
    z{debug=True with opt=True (the default) is not supported by CUDA. This may result in a crash - set debug=False or opt=False.   r   )r   opt)r   r   r   r)   r   z'CUDA kernel must have void return type.)r   )r   r   r   normalize_signaturer   CUDA_DEFAULT_PTX_CCr   rD   rF   r   void	TypeErrorrB   r?   __code__co_filenameco_firstlinenoprepare_cuda_kernelrE   get_asm_str)r   sigr   r   devicer   r   r   re   r)   rG   rF   r   restylibtgtcodefilenamelinenumkernelptxr   r   r    compile_ptx   s6   


r   c              
   C   s    t  j}t| ||||||ddS )zCompile a Python function to PTX for a given set of argument types for
    the current device's compute capabilility. This calls :func:`compile_ptx`
    with an appropriate ``cc`` value for the current device.T)r   r   r   r   r   r   )r   r+   r   )r   r   r   r   r   r   r   r   r   r   r    compile_ptx_for_current_device9  s   
r   c                 C   s   t | ||jS r   ) declare_device_function_templatekeyrY   restypeargtypesr   r   r    declare_device_functionC  r<   r   c                    sv   ddl m} |j}|j}tj|g|R  t|  G  fdddt}tj	| ||d}|
 | |
 | |S )Nr^   r   c                       s   e Zd Z ZgZdS )zBdeclare_device_function_template.<locals>.device_function_templateN)r&   r'   r(   r   casesr   extfnr   r   r    device_function_templateN  s    
r   r   )r   r   r>   r?   r   rD   ExternFunctionr   r   ExternalFunctionDescriptorinsert_user_function)rY   r   r   r   rH   rI   r   rE   r   r   r    r   G  s   
r   c                   @   s   e Zd Zdd ZdS )r   c                 C   s   || _ || _d S r   )rY   r   )r/   rY   r   r   r   r    r;   [  s   
zExternFunction.__init__N)r&   r'   r(   r;   r   r   r   r    r   Z  s    r   )FFFFNN)FFFFNT)FFFFT)1numba.core.typing.templatesr   
numba.corer   r   r   r   r   r   numba.core.compilerr	   r
   r   r   r   r   numba.core.compiler_lockr   numba.core.compiler_machineryr   r   r   r   numba.core.errorsr   r   numba.core.typed_passesr   r   r   warningsr   numba.cuda.apir   r!   r"   r,   r3   r7   rR   r[   rp   r   r   r   r   r   objectr   r   r   r   r    <module>   sF      	


"":C

