不用更换环境:
找到GELU的位置,我的是在activation.py文件,将其修改为不使用 approximate
属性即可。
把原始的文件注释如下:
# class GELU(Module):
# r"""Applies the Gaussian Error Linear Units function:
# .. math:: \text{GELU}(x) = x * \Phi(x)
# where :math:`\Phi(x)` is the Cumulative Distribution Function for Gaussian Distribution.
# When the approximate argument is 'tanh', Gelu is estimated with:
# .. math:: \text{GELU}(x) = 0.5 * x * (1 + \text{Tanh}(\sqrt(2 / \pi) * (x + 0.044715 * x^3)))
# Args:
# approximate (str, optional): the gelu approximation algorithm to use:
# ``'none'`` | ``'tanh'``. Default: ``'none'``
# Shape:
# - Input: :math:`(*)`, where :math:`*` means any number of dimensions.
# - Output: :math:`(*)`, same shape as the input.
# .. image:: ../scripts/activation_images/GELU.png
# Examples::
# >>> m = nn.GELU()
# >>> input = torch.randn(2)
# >>> output = m(input)
# """
# __constants__ = ['approximate']
# approximate: str
# def __init__(self, approximate: str = 'none') -> None:
# super(GELU, self).__init__()
# self.approximate = approximate
# def forward(self, input: Tensor) -> Tensor:
# return F.gelu(input, approximate=self.approximate)
# def extra_repr(self) -> str:
# return 'approximate={}'.format(repr(self.approximate))
替换为:
class GELU(Module):
r"""Applies the Gaussian Error Linear Units function:
.. math:: \text{GELU}(x) = x * \Phi(x)
where :math:`\Phi(x)` is the Cumulative Distribution Function for Gaussian Distribution.
Shape:
- Input: :math:`(*)`, where :math:`*` means any number of dimensions.
- Output: :math:`(*)`, same shape as the input.
.. image:: ../scripts/activation_images/GELU.png
Examples::
>>> m = nn.GELU()
>>> input = torch.randn(2)
>>> output = m(input)
"""
def __init__(self) -> None:
super(GELU, self).__init__()
def forward(self, input: Tensor) -> Tensor:
return F.gelu(input)
def extra_repr(self) -> str:
return 'GELU()'