Source code for ctm.one_site_c4v.ctmrg_c4v

import time
from math import sqrt
import torch
from torch.utils.checkpoint import checkpoint
import config as cfg
from ipeps.ipeps_c4v import IPEPS_C4V
from ctm.one_site_c4v.env_c4v import *
from ctm.one_site_c4v.ctm_components_c4v import *
from ctm.one_site_c4v.fpcm_c4v import fpcm_MOVE_sl
from linalg.custom_svd import *
from linalg.custom_eig import *
import logging
log = logging.getLogger(__name__)

[docs]def run(state, env, conv_check=None, ctm_args=cfg.ctm_args, global_args=cfg.global_args): r""" :param state: wavefunction :param env: initial C4v symmetric environment :param conv_check: function which determines the convergence of CTM algorithm. If ``None``, the algorithm performs ``ctm_args.ctm_max_iter`` iterations. :param ctm_args: CTM algorithm configuration :param global_args: global configuration :type state: IPEPS_C4V :type env: ENV_C4V :type conv_check: function(IPEPS_C4V,ENV_C4V,Object,CTMARGS)->bool :type ctm_args: CTMARGS :type global_args: GLOBALARGS Executes specialized CTM algorithm for 1-site C4v symmetric iPEPS starting from the intial environment ``env``. The is a single-layer version, which avoids explicitly building double-layer on-site tensor. To establish the convergence of CTM before the maximal number of iterations is reached a ``conv_check`` function is invoked. Its expected signature is ``conv_check(IPEPS_C4V,ENV_C4V,Object,CTMARGS)`` where ``Object`` is an arbitary argument. For example it can be a list or dict used for storing CTM data from previous steps to check convergence. If desired, CTM can be accelerated by fixed-point corner-matrix algorithm (FPCM) controlled by settings in :py:class:`CTMARGS <config.CTMARGS>`. .. note:: Currently, FPCM does not support reverse-mode differentiation. """ if ctm_args.projector_svd_method=='DEFAULT' or ctm_args.projector_svd_method=='SYMEIG': def truncated_eig(M, chi): return truncated_eig_sym(M, chi, keep_multiplets=True,\ ad_decomp_reg=ctm_args.ad_decomp_reg, verbosity=ctm_args.verbosity_projectors) elif ctm_args.projector_svd_method == 'SYMARP': def truncated_eig(M, chi): return truncated_eig_symarnoldi(M, chi, keep_multiplets=True, \ verbosity=ctm_args.verbosity_projectors) elif ctm_args.projector_svd_method == 'SYMLOBPCG': def truncated_eig(M, chi): return truncated_eig_symlobpcg(M, chi, keep_multiplets=True, \ verbosity=ctm_args.verbosity_projectors) # elif ctm_args.projector_svd_method == 'GESDD': # def truncated_eig(M, chi): # return truncated_svd_gesdd(M, chi, verbosity=ctm_args.verbosity_projectors) # elif cfg.ctm_args.projector_svd_method == 'RSVD': # truncated_svd= truncated_svd_rsvd else: raise Exception(f"Projector eig/svd method \"{cfg.ctm_args.projector_svd_method}\" not implemented") a= next(iter(state.sites.values())) # 1) perform CTMRG t_obs=t_ctm=t_fpcm=0. history=None past_steps_data=dict() # possibly store some data throughout the execution of CTM for i in range(ctm_args.ctm_max_iter): # FPCM acceleration if i>=ctm_args.fpcm_init_iter and ctm_args.fpcm_freq>0 and i%ctm_args.fpcm_freq==0: t0_fpcm= time.perf_counter() fpcm_MOVE_sl(a, env, ctm_args=ctm_args, global_args=global_args, past_steps_data=past_steps_data) t1_fpcm= time.perf_counter() t_fpcm+= t1_fpcm-t0_fpcm log.info(f"fpcm_MOVE_sl DONE t_fpcm {t1_fpcm-t0_fpcm} [s]") t0_ctm= time.perf_counter() ctm_MOVE_sl(a, env, truncated_eig, ctm_args=ctm_args, global_args=global_args,\ past_steps_data=past_steps_data) t1_ctm= time.perf_counter() t0_obs= time.perf_counter() if conv_check is not None: # evaluate convergence of the CTMRG procedure converged, history= conv_check(state, env, history, ctm_args=ctm_args) if converged: if ctm_args.verbosity_ctm_convergence>0: print(f"CTMRG converged at iter= {i}") break t1_obs= time.perf_counter() t_ctm+= t1_ctm-t0_ctm t_obs+= t1_obs-t0_obs return env, history, t_ctm, t_obs
[docs]def run_dl(state, env, conv_check=None, ctm_args=cfg.ctm_args, global_args=cfg.global_args): r""" :param state: wavefunction :param env: initial C4v symmetric environment :param conv_check: function which determines the convergence of CTM algorithm. If ``None``, the algorithm performs ``ctm_args.ctm_max_iter`` iterations. :param ctm_args: CTM algorithm configuration :param global_args: global configuration :type state: IPEPS_C4V :type env: ENV_C4V :type conv_check: function(IPEPS,ENV_C4V,Object,CTMARGS)->bool :type ctm_args: CTMARGS :type global_args: GLOBALARGS A double-layer variant (explicitly building double-layer tensor) of CTM algorithm. See :meth:`run`. """ if ctm_args.projector_svd_method=='DEFAULT' or ctm_args.projector_svd_method=='SYMEIG': def truncated_eig(M, chi): return truncated_eig_sym(M, chi, keep_multiplets=True,\ verbosity=ctm_args.verbosity_projectors) elif ctm_args.projector_svd_method == 'SYMARP': def truncated_eig(M, chi): return truncated_eig_symarnoldi(M, chi, keep_multiplets=True, \ verbosity=ctm_args.verbosity_projectors) elif ctm_args.projector_svd_method == 'SYMLOBPCG': def truncated_eig(M, chi): return truncated_eig_symlobpcg(M, chi, keep_multiplets=True, \ verbosity=ctm_args.verbosity_projectors) else: raise Exception(f"Projector eig/svd method \"{cfg.ctm_args.projector_svd_method}\" not implemented") a= next(iter(state.sites.values())) # 1) perform CTMRG t_obs=t_ctm=t_fpcm=0. history=None past_steps_data=dict() # possibly store some data throughout the execution of CTM for i in range(ctm_args.ctm_max_iter): # FPCM acceleration if i>=ctm_args.fpcm_init_iter and ctm_args.fpcm_freq>0 and i%ctm_args.fpcm_freq==0: t0_fpcm= time.perf_counter() fpcm_MOVE_sl(a, env, ctm_args=ctm_args, global_args=global_args, past_steps_data=past_steps_data) t1_fpcm= time.perf_counter() t_fpcm+= t1_fpcm-t0_fpcm log.info(f"fpcm_MOVE_sl DONE t_fpcm {t1_fpcm-t0_fpcm} [s]") t0_ctm= time.perf_counter() ctm_MOVE_dl(a, env, truncated_eig, ctm_args=ctm_args, global_args=global_args) t1_ctm= time.perf_counter() t0_obs= time.perf_counter() if conv_check is not None: # evaluate convergence of the CTMRG procedure converged, history= conv_check(state, env, history, ctm_args=ctm_args) if converged: if ctm_args.verbosity_ctm_convergence>0: print(f"CTMRG converged at iter= {i}") break t1_obs= time.perf_counter() t_ctm+= t1_ctm-t0_ctm t_obs+= t1_obs-t0_obs return env, history, t_ctm, t_obs
def _log_cuda_mem(device, who="unknown", uuid=""): log.info(f"{who} {uuid} GPU-MEM MAX_ALLOC {torch.cuda.max_memory_allocated(device)}"\ + f" CURRENT_ALLOC {torch.cuda.memory_allocated(device)}") # performs CTM move
[docs]def ctm_MOVE_dl(a, env, f_c2x2_decomp, ctm_args=cfg.ctm_args, global_args=cfg.global_args): r""" :param a: on-site C4v symmetric tensor :param env: C4v symmetric environment :param f_c2x2_decomp: function performing the truncated spectral decomposition (eigenvalue/svd) of enlarged corner. The ``f_c2x2_decomp`` returns a tuple composed of leading chi spectral values and projector on leading chi spectral values. :param ctm_args: CTM algorithm configuration :param global_args: global configuration :type a: torch.Tensor :type env: ENV_C4V :type f_c2x2_decomp: function(torch.Tensor, int)->torch.Tensor, torch.Tensor :type ctm_args: CTMARGS :type global_args: GLOBALARGS Executes a single step of C4v symmetric CTM algorithm for 1-site C4v symmetric iPEPS. This variant of CTM step does explicitly build double-layer on-site tensor. Hence, the leading cost in memory usage with bond dimension `D` is :math:`O(D^8)`. """ who= "ctm_MOVE_dl" log_gpu_mem= False if global_args.device=='cpu' and global_args.offload_to_gpu != 'None': loc_gpu= torch.device(global_args.offload_to_gpu) log_gpu_mem= ctm_args.verbosity_ctm_move>0 elif global_args.device != 'cpu': loc_gpu= a.device log_gpu_mem= ctm_args.verbosity_ctm_move>0 # 0) extract raw tensors as tuple dimsa = a.size() A = torch.einsum('sefgh,sabcd->eafbgchd',a,a.conj()).contiguous()\ .view(dimsa[1]**2, dimsa[2]**2, dimsa[3]**2, dimsa[4]**2) tensors= tuple([A,env.C[env.keyC],env.T[env.keyT]]) # function wrapping up the core of the CTM MOVE segment of CTM algorithm def ctm_MOVE_dl_c(*tensors): A, C, T= tensors if global_args.device=='cpu' and global_args.offload_to_gpu != 'None': A= A.to(global_args.offload_to_gpu) #A.cuda() C= C.to(global_args.offload_to_gpu) #C.cuda() T= T.to(global_args.offload_to_gpu) #T.cuda() # 1) build enlarged corner upper left corner if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="c2x2_dl_init") C2X2= c2x2_dl(A, C, T, verbosity=ctm_args.verbosity_projectors) if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="c2x2_dl_end") # 2) build projector if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="f_c2x2_decomp_init") #P, S, V = f_c2x2_decomp(C2X2, env.chi) # M = PSV^{T} D, P = f_c2x2_decomp(C2X2, env.chi) # M = PSV^{T} if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="f_c2x2_decomp_end") # 3) absorb and truncate # # C2X2--1 0--P--1 # 0 # 0 # P^t # 1->0 # C2X2= P.t() @ C2X2 @ P C2X2= torch.diag((1.+0.j)*D) if C2X2.is_complex() else torch.diag(D) if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="P-view_init") P= P.view(env.chi,T.size()[2],env.chi) if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="P-view_end") # 2->1 # __P__ # 0 1->0 # A 0 # | T--2->3 # | 1->2 if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="PT_init") nT = torch.tensordot(P, T,([0],[0])) if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="PT_end") # 1->0 # __P____ # | 0 # | 0 # T--3 1--A--3 # 2->1 2 if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="PTA_init") nT = torch.tensordot(nT, A,([0,3],[0,1])) if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="PTA_end") # 0 # __P____ # | | # | | # T-------A--3->1 # 1 2 # 0 1 # |___P___| # 2 if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="PTAP_init") nT = torch.tensordot(nT, P.conj(),([1,2],[0,1])) if log_gpu_mem: _log_cuda_mem(loc_gpu, who=who, uuid="PTAP_end") nT = nT.permute(0,2,1).contiguous() # 4) symmetrize, normalize and assign new C,T nT= 0.5*(nT + nT.permute(1,0,2).conj()) C2X2= C2X2/torch.abs(C2X2[0,0]) nT= nT/nT.norm() if global_args.device=='cpu' and global_args.offload_to_gpu != 'None': C2X2= C2X2.cpu() nT= nT.cpu() return C2X2, nT # Call the core function, allowing for checkpointing if ctm_args.fwd_checkpoint_move: new_tensors= checkpoint(ctm_MOVE_dl_c,*tensors) else: new_tensors= ctm_MOVE_dl_c(*tensors) env.C[env.keyC]= new_tensors[0] env.T[env.keyT]= new_tensors[1]
# performs CTM move
[docs]def ctm_MOVE_sl(a, env, f_c2x2_decomp, ctm_args=cfg.ctm_args, global_args=cfg.global_args, past_steps_data=None): r""" :param a: on-site C4v symmetric tensor :param env: C4v symmetric environment :param f_c2x2_decomp: function performing the truncated spectral decomposition (eigenvalue/svd) of enlarged corner. The ``f_c2x2_decomp`` returns a tuple composed of leading chi spectral values and projector on leading chi spectral values. :param ctm_args: CTM algorithm configuration :param global_args: global configuration :param past_steps_data: dictionary used for recording diagnostic information during CTM :type a: torch.Tensor :type env: ENV_C4V :type f_c2x2_decomp: function(torch.Tensor, int)->torch.Tensor, torch.Tensor :type ctm_args: CTMARGS :type global_args: GLOBALARGS :type past_steps_data: Executes a single step of C4v symmetric CTM algorithm for 1-site C4v symmetric iPEPS. This variant of CTM step does not explicitly build double-layer on-site tensor. """ # 0) extract raw tensors as tuple tensors= tuple([a,env.C[env.keyC],env.T[env.keyT]]) # function wrapping up the core of the CTM MOVE segment of CTM algorithm def ctm_MOVE_sl_c(*tensors): a, C, T= tensors if global_args.device=='cpu' and global_args.offload_to_gpu != 'None': #loc_gpu= torch.device(global_args.gpu) a= a.to(global_args.offload_to_gpu) #a.cuda() C= C.to(global_args.offload_to_gpu) #C.cuda() T= T.to(global_args.offload_to_gpu) #T.cuda() # 1) build enlarged corner upper left corner C2X2= c2x2_sl(a, C, T, verbosity=ctm_args.verbosity_projectors) # 2) build projector # P, S, V = f_c2x2_decomp(C2X2, env.chi) # M = PSV^T D, P= f_c2x2_decomp(C2X2, env.chi) # M = UDU^T # 3) absorb and truncate # # C2X2--1 0--P--1 # 0 # 0 # P^t # 1->0 # C2X2= P.t() @ C2X2 @ P C2X2= torch.diag((1.+0.j)*D) if C2X2.is_complex() else torch.diag(D) P= P.view(env.chi,T.size()[2],env.chi) # 2->1 # __P__ # 0 1->0 # A 0 # | T--2->3 # | 1->2 nT= torch.tensordot(P, T,([0],[0])) # 4) double-layer tensor contraction - layer by layer # 4i) untangle the fused D^2 indices # 1->2 # __P__ # | 0->0,1 # | # T--3->4,5 # 2->3 nT= nT.view(a.size()[1],a.size()[1],nT.size()[1],nT.size()[2],\ a.size()[2],a.size()[2]) # 4ii) first layer "bra" (in principle conjugate) # 2->1 # __P___________ # | 0 1->0 # | 1 /0->4 # T----4 2--a--4->6 # | | 3->5 # | --5->3 # 3->2 nT= torch.tensordot(nT, a,([0,4],[1,2])) # 4iii) second layer "ket" # 1->0 # __P__________ # | | 0 # | |/4 0\ | # T----a---------6->3 # | | | \1 # | -----3 2--a--4->5 # | | 3->4 # | | # 2->1 5->2 nT= torch.tensordot(nT, a.conj(),([0,3,4],[1,2,0])) # 4iv) fuse pairs of aux indices # 0 # __P_ # | | # T----a----3\ # | | |\ ->3 # | ----a--5/ # | | | # | | | # 1 (2 4)->2 nT= nT.permute(0,1,2,4,3,5).contiguous().view(nT.size()[0],nT.size()[1],\ a.size()[3]**2,a.size()[4]**2) # 0 # __P____ # | | # | | A 0 # T------aa--3->1 => | T--2 # 1 2 | 1 # 0 1 # |___P___| # 2 nT = torch.tensordot(nT,P.conj(),([1,2],[0,1])) nT = nT.permute(0,2,1).contiguous() # 4) symmetrize, normalize and assign new C,T nT= 0.5*(nT + nT.conj().permute(1,0,2)) C2X2= C2X2/torch.abs(C2X2[0,0]) nT= nT/nT.abs().max() #nT= nT/nT.norm() if global_args.device=='cpu' and global_args.offload_to_gpu != 'None': C2X2= C2X2.cpu() nT= nT.cpu() return C2X2, nT # Call the core function, allowing for checkpointing if ctm_args.fwd_checkpoint_move: new_tensors= checkpoint(ctm_MOVE_sl_c,*tensors) else: new_tensors= ctm_MOVE_sl_c(*tensors) env.C[env.keyC]= new_tensors[0] env.T[env.keyT]= new_tensors[1]