prepare for sync
This commit is contained in:
parent
6da55e8f87
commit
ef4d1ddda4
@ -122,17 +122,21 @@ def attn_varlen_func(q, k, v, cu_seqlens_q, cu_seqlens_kv, max_seqlen_q, max_seq
|
|||||||
x = torch.nn.functional.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)).transpose(1, 2)
|
x = torch.nn.functional.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)).transpose(1, 2)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
batch_size = q.shape[0]
|
B, L, H, C = q.shape
|
||||||
q = q.view(q.shape[0] * q.shape[1], *q.shape[2:])
|
|
||||||
k = k.view(k.shape[0] * k.shape[1], *k.shape[2:])
|
q = q.flatten(0, 1)
|
||||||
v = v.view(v.shape[0] * v.shape[1], *v.shape[2:])
|
k = k.flatten(0, 1)
|
||||||
|
v = v.flatten(0, 1)
|
||||||
|
|
||||||
if sageattn_varlen is not None:
|
if sageattn_varlen is not None:
|
||||||
x = sageattn_varlen(q, k, v, cu_seqlens_q, cu_seqlens_kv, max_seqlen_q, max_seqlen_kv)
|
x = sageattn_varlen(q, k, v, cu_seqlens_q, cu_seqlens_kv, max_seqlen_q, max_seqlen_kv)
|
||||||
elif flash_attn_varlen_func is not None:
|
elif flash_attn_varlen_func is not None:
|
||||||
x = flash_attn_varlen_func(q, k, v, cu_seqlens_q, cu_seqlens_kv, max_seqlen_q, max_seqlen_kv)
|
x = flash_attn_varlen_func(q, k, v, cu_seqlens_q, cu_seqlens_kv, max_seqlen_q, max_seqlen_kv)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError('No Attn Installed!')
|
raise NotImplementedError('No Attn Installed!')
|
||||||
x = x.view(batch_size, max_seqlen_q, *x.shape[2:])
|
|
||||||
|
x = x.unflatten(0, (B, L))
|
||||||
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
@ -926,7 +930,6 @@ class HunyuanVideoTransformer3DModelPacked(ModelMixin, ConfigMixin, PeftAdapterM
|
|||||||
encoder_hidden_states = torch.cat([extra_encoder_hidden_states, encoder_hidden_states], dim=1)
|
encoder_hidden_states = torch.cat([extra_encoder_hidden_states, encoder_hidden_states], dim=1)
|
||||||
encoder_attention_mask = torch.cat([extra_attention_mask, encoder_attention_mask], dim=1)
|
encoder_attention_mask = torch.cat([extra_attention_mask, encoder_attention_mask], dim=1)
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
if batch_size == 1:
|
if batch_size == 1:
|
||||||
# When batch size is 1, we do not need any masks or var-len funcs since cropping is mathematically same to what we want
|
# When batch size is 1, we do not need any masks or var-len funcs since cropping is mathematically same to what we want
|
||||||
# If they are not same, then their impls are wrong. Ours are always the correct one.
|
# If they are not same, then their impls are wrong. Ours are always the correct one.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user