Skip to content

eval_rho2 and eval_vxc in cuda #325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gpu4pyscf/__config__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
# Use smaller blksize for old gaming GPUs
if props['totalGlobalMem'] < 16 * GB:
min_ao_blksize = 64
min_grid_blksize = 64*64
min_grid_blksize = 128*128

# Use 90% of the global memory for CuPy memory pool
mem_fraction = 0.9
Expand Down
6 changes: 3 additions & 3 deletions gpu4pyscf/df/df.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from cupyx.scipy.linalg import solve_triangular
from pyscf import lib
from pyscf.df import df, addons, incore
from gpu4pyscf.lib.cupy_helper import (cholesky, tag_array, get_avail_mem,
from gpu4pyscf.lib.cupy_helper import (cholesky, tag_array, get_avail_mem,
cart2sph, p2p_transfer, copy_array)
from gpu4pyscf.df import int3c2e, df_jk
from gpu4pyscf.lib import logger
Expand Down Expand Up @@ -269,7 +269,7 @@ def cholesky_eri_gpu(intopt, mol, auxmol, cd_low,

return _cderi

def _cderi_task(intopt, cd_low, task_list, _cderi, aux_blksize,
def _cderi_task(intopt, cd_low, task_list, _cderi, aux_blksize,
omega=None, sr_only=False, device_id=0):
''' Execute CDERI tasks on one device
'''
Expand Down Expand Up @@ -362,5 +362,5 @@ def _cderi_task(intopt, cd_low, task_list, _cderi, aux_blksize,
_cderi[dev_id][:,ij0:ij1] = tmp
else:
_cderi[0][:,ij0:ij1] = cderi_block
t1 = log.timer_debug1(f'transfer data for {cp_ij_id} / {nq} on Device {device_id}', *t1)
t1 = log.timer_debug1(f'transfer data for {cp_ij_id} / {nq} on Device {device_id}', *t1)
return
4 changes: 2 additions & 2 deletions gpu4pyscf/df/grad/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
orbo = intopt.sort_orbitals(orbo, axis=[0])

rhoj, rhok = get_rhojk(with_df, dm, orbo, with_j=with_j, with_k=with_k)

# (d/dX P|Q) contributions
if omega and omega > 1e-10:
with auxmol.with_range_coulomb(omega):
Expand Down Expand Up @@ -151,7 +151,7 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
cart2sph = intopt.cart2sph
orbo_cart = cart2sph @ orbo
dm_cart = cart2sph @ dm @ cart2sph.T

with_df._cderi = None # release GPU memory
vj, vk, vjaux, vkaux = get_grad_vjk(with_df, mol, auxmol, rhoj_cart, dm_cart, rhok_cart, orbo_cart,
with_j=with_j, with_k=with_k, omega=omega)
Expand Down
8 changes: 4 additions & 4 deletions gpu4pyscf/df/grad/uhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
FREE_CUPY_CACHE = True
BINSIZE = 128

def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True,
def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True,
omega=None, mo_coeff=None, mo_occ=None, dm2 = None):
'''
Computes the first-order derivatives of the energy contributions from
Expand Down Expand Up @@ -143,11 +143,11 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True,

nao_cart = intopt._sorted_mol.nao
block_size = with_df.get_blksize(nao=nao_cart)

intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e')
intopt.build(mf.direct_scf_tol, diag_block_with_triu=True, aosym=False,
group_size_aux=block_size)#, group_size=block_size)

if not mol.cart:
# sph2cart for ao
cart2sph = intopt.cart2sph
Expand All @@ -168,7 +168,7 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True,
with_df._cderi = None # release GPU memory
vj, vk, vjaux, vkaux = get_grad_vjk(with_df, mol, auxmol, rhoj_cart, dm_cart, rhok_cart, orbo_cart,
with_j=with_j, with_k=with_k, omega=omega)

# NOTE: vj and vk are still in cartesian
_sorted_mol = intopt._sorted_mol
natm = _sorted_mol.natm
Expand Down
Loading
Loading