o
    Td                      @   s.  d dl Z d dlmZ d dlmZ ddlmZ ddlmZ dd Z	d	d
 Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4d5d6d7d8d9d:d;d<d=d>d?d@dAZdBdC Z G dDdE dEe!Z"dFdG Z#dS )H    N)ir)cgutils   )nvvm)current_contextc                 C   sN   dt | d }tt|tt|t|t|f}t| ||S )N___numba_atomic_i	_cas_hack)strr   FunctionTypeIntTypePointerTyper   get_or_insert_function)lmodisizefnamefnty r   g/home/ncw/WWW/www-new/content/articles/pi-bbp/venv/lib/python3.10/site-packages/numba/cuda/nvvmutils.pydeclare_atomic_cas_int   s   r   c                 C   s@   t  jr| |||dd}| |dS | t|||||fS )N	monotonicr   )r   NVVM	is_nvvm70cmpxchgextract_valuecallr   )builderr   r   ptrcmpvaloutr   r   r   atomic_cmpxchg   s   
r    c                 C   s6   d}t t  t t  dt  f}t| ||S )Nz#llvm.nvvm.atomic.load.add.f32.p0f32r   r   r
   	FloatTyper   r   r   r   r   r   r   r   r   declare_atomic_add_float32   s
   
r$   c                 C   sH   t  jjdkr
d}nd}tt tt t f}t| ||S )N)   r   z#llvm.nvvm.atomic.load.add.f64.p0f64___numba_atomic_double_add)	r   devicecompute_capabilityr   r
   
DoubleTyper   r   r   r#   r   r   r   declare_atomic_add_float64!   s   
r*   c                 C   4   d}t t  t t  t  f}t| ||S )N___numba_atomic_float_subr!   r#   r   r   r   declare_atomic_sub_float32+   
   
r-   c                 C   r+   )N___numba_atomic_double_subr   r
   r)   r   r   r   r#   r   r   r   declare_atomic_sub_float642   r.   r1   c                 C   :   d}t t dt t dt df}t| ||S )Nz"llvm.nvvm.atomic.load.inc.32.p0i32    r   r
   r   r   r   r   r#   r   r   r   declare_atomic_inc_int329   
   r5   c                 C   r2   )N___numba_atomic_u64_inc@   r4   r#   r   r   r   declare_atomic_inc_int64@   r6   r9   c                 C   r2   )Nz"llvm.nvvm.atomic.load.dec.32.p0i32r3   r4   r#   r   r   r   declare_atomic_dec_int32G   r6   r:   c                 C   r2   )N___numba_atomic_u64_decr8   r4   r#   r   r   r   declare_atomic_dec_int64N   r6   r<   c                 C   r+   )N___numba_atomic_float_maxr!   r#   r   r   r   declare_atomic_max_float32U   r.   r>   c                 C   r+   )N___numba_atomic_double_maxr0   r#   r   r   r   declare_atomic_max_float64\   r.   r@   c                 C   r+   )N___numba_atomic_float_minr!   r#   r   r   r   declare_atomic_min_float32c   r.   rB   c                 C   r+   )N___numba_atomic_double_minr0   r#   r   r   r   declare_atomic_min_float64j   r.   rD   c                 C   r+   )N___numba_atomic_float_nanmaxr!   r#   r   r   r   declare_atomic_nanmax_float32q   r.   rF   c                 C   r+   )N___numba_atomic_double_nanmaxr0   r#   r   r   r   declare_atomic_nanmax_float64x   r.   rH   c                 C   r+   )N___numba_atomic_float_nanminr!   r#   r   r   r   declare_atomic_nanmin_float32   r.   rJ   c                 C   r+   )N___numba_atomic_double_nanminr0   r#   r   r   r   declare_atomic_nanmin_float64   r.   rL   c                 C   s,   d}t t dt df}t| ||S )NcudaCGGetIntrinsicHandler8   r3   r   r
   r   r   r   r#   r   r   r    declare_cudaCGGetIntrinsicHandle   s
   
rO   c                 C   s4   d}t t dt dt df}t| ||S )NcudaCGSynchronizer3   r8   rN   r#   r   r   r   declare_cudaCGSynchronize   s
   rQ   c                 C   s`   | j jj}t|dd }tj||jdtj	d}d|_
d|_||_| |ttddS )	Nzutf-8    _str)name	addrspaceinternalT   generic)basic_blockfunctionmoduler   make_bytearrayencodeadd_global_variabletyper   ADDRSPACE_CONSTANTlinkageglobal_constantinitializeraddrspacecastr   r   r   )r   valuer   cvalglr   r   r   declare_string   s   
rh   c                 C   s8   t t d}t t d||g}t| |d}|S )NrW   r3   vprintf)r   r   r   r
   r   r   )r   	voidptrty	vprintftyri   r   r   r   declare_vprint   s   rl   zllvm.nvvm.read.ptx.sreg.tid.xzllvm.nvvm.read.ptx.sreg.tid.yzllvm.nvvm.read.ptx.sreg.tid.zzllvm.nvvm.read.ptx.sreg.ntid.xzllvm.nvvm.read.ptx.sreg.ntid.yzllvm.nvvm.read.ptx.sreg.ntid.zzllvm.nvvm.read.ptx.sreg.ctaid.xzllvm.nvvm.read.ptx.sreg.ctaid.yzllvm.nvvm.read.ptx.sreg.ctaid.zz llvm.nvvm.read.ptx.sreg.nctaid.xz llvm.nvvm.read.ptx.sreg.nctaid.yz llvm.nvvm.read.ptx.sreg.nctaid.zz llvm.nvvm.read.ptx.sreg.warpsizezllvm.nvvm.read.ptx.sreg.laneid)ztid.xztid.yztid.zzntid.xzntid.yzntid.zzctaid.xzctaid.yzctaid.zznctaid.xznctaid.yznctaid.zwarpsizelaneidc                 C   s6   | j }ttdd}t||t| }| |dS )Nr3   r   )r[   r   r
   r   r   r   SREG_MAPPINGr   )r   rT   r[   r   fnr   r   r   	call_sreg   s   rq   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )SRegBuilderc                 C   s
   || _ d S N)r   )selfr   r   r   r   __init__   s   
zSRegBuilder.__init__c                 C      t | jd| S )Nztid.%srq   r   rt   xyzr   r   r   tid      zSRegBuilder.tidc                 C   rv   )Nzctaid.%srw   rx   r   r   r   ctaid   r{   zSRegBuilder.ctaidc                 C   rv   )Nzntid.%srw   rx   r   r   r   ntid   r{   zSRegBuilder.ntidc                 C   rv   )Nz	nctaid.%srw   rx   r   r   r   nctaid   r{   zSRegBuilder.nctaidc                 C   s:   |  |}| |}| |}| j| j|||}|S rs   )rz   r}   r|   r   addmul)rt   ry   rz   r}   r~   resr   r   r   getdim   s
   


zSRegBuilder.getdimN)	__name__
__module____qualname__ru   rz   r|   r}   r~   r   r   r   r   r   rr      s    rr   c                    s@   t |   fdddD }tt|d |}|dkr|d S |S )Nc                 3   s    | ]}  |V  qd S rs   )r   ).0ry   sregr   r   	<genexpr>   s    z get_global_id.<locals>.<genexpr>ry   r   r   )rr   list	itertoolsislice)r   dimitseqr   r   r   get_global_id   s   r   )$r   llvmliter   
numba.corer   cudadrvr   apir   r   r    r$   r*   r-   r1   r5   r9   r:   r<   r>   r@   rB   rD   rF   rH   rJ   rL   rO   rQ   rh   rl   ro   rq   objectrr   r   r   r   r   r   <module>   sZ    		
