Skip to content
Unverified Commit f3358838 authored by Momchil Velikov's avatar Momchil Velikov Committed by GitHub
Browse files

[AArch64][SVE2.1] Add intrinsics for quadword loads/stores with unscaled offset (#70474)

This patch adds a set of SVE2.1 quadword load/store intrisics:

  * Contiguous zero-extending load to quadword (single vector)

    sv<type>_t svld1uwq[_<typ>](svbool_t, const <type>_t *ptr);
    sv<type>_t svld1uwq_vnum[_<typ>](svbool_t, const <type> *ptr, int64_t vnum);
 
    sv<type>_t svld1udq[_<typ>](svbool_t, const <type>_t *ptr);
    sv<type>_t svld1udq_vnum[_<typ>](svbool_t, const <type>_t *ptr, int64_t vnum);

  * Contiguous truncating store of single vector operand

    void svst1uwq[_<typ>](svbool_t, const <type>_t *ptr, sv<type>_t data);
    void svst1uwq_vnum[_<typ>](svbool_t, const <type>_t *ptr, int64_t vnum, sv<type>_t data);

    void svst1udq[_<typ>](svbool_t, const <type>_t *ptr, sv<type>_t data);
    void svst1udq_vnum[_<typ>](svbool_t, const <type>_t *ptr, int64_t vnum, sv<type>_t data);

  * Gather load quadword

    sv<type>_t svld1q_gather[_u64base]_<typ>(svbool_t pg, svuint64_t zn);
    sv<type>_t svld1q_gather[_u64base]_offset_<typ>(svbool_t pg, svuint64_t zn, int64_t offset);

  * Scatter store quadword

    void svst1q_scatter[_u64base][_<typ>](svbool_t pg, svuint64_t zn, sv<type>_t data);
    void svst1q_scatter[_u64base]_offset[_<typ>](svbool_t pg, svuint64_t zn, int64_t offset, sv<type>_t data);

  * Contiguous load two, three or four quadword structures.

    sv<type>x2_t svld2q[_<typ>](svbool_t pg, const <type>_t *rn);
    sv<type>x2_t svld2q_vnum[_<typ>](svbool_t pg, const <type>_t *rn, uint64_t vnum);
    sv<type>x3_t svld3q[_<typ>](svbool_t pg, const <type>_t *rn);
    sv<type>x3_t svld3q_vnum[_<typ>](svbool_t pg, const <type>_t *rn, uint64_t vnum);
    sv<type>x4_t svld4q[_<typ>](svbool_t pg, const <type>_t *rn);
    sv<type>x4_t svld4q_vnum[_<typ>](svbool_t pg, const <type>_t *rn, uint64_t vnum);

  * Contiguous store two, three or four quadword structures.

    void svst2q[_<typ>](svbool_t pg, <type>_t *rn, sv<type>x2_t zt);
    void svst2q_vnum[_<typ>](svbool_t pg, <type>_t *rn, int64_t vnum, sv<type>x2_t zt);
    void svst3q[_<typ>](svbool_t pg, <type>_t *rn, sv<type>x3_t zt);
    void svst3q_vnum[_<typ>](svbool_t pg, <type>_t *rn, int64_t vnum, sv<type>x3_t zt);
    void svst4q[_<typ>](svbool_t pg, <type>_t *rn, sv<type>x4_t zt);
    void svst4q_vnum[_<typ>](svbool_t pg, <type>_t *rn, int64_t vnum, sv<type>x4_t zt);

ACLE spec: https://github.com/ARM-software/acle/pull/257



Co-authored-by: default avatarCaroline Concatto <caroline.concatto@arm.com>
Co-authored-by: default avatarHassnaa Hamdi <hassnaa.hamdi@arm.com>
parent a3908d33
Loading
Loading
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment