@article {xPerera22, title = {Ancestry-conditioned Generative Models for Genotyping}, year = {2022}, abstract = {

Local ancestry inference (LAI) identifies the ancestry of each segment of an individual{\textquoteright}s genome and it is a critical step in the analysis of human genomes with applications from pharmacogenomics and personalized medicine to increase detection of genetic associations.\ 

New LAI techniques are appearing at a fast pace in both industry and academic research and large data-sets of human genomic sequences from the ancestries of interest are required to train those methods. Usually, those data-sets are protected by privacy regulations, are proprietary or accessible only when they come with restrictions due to its nature. An interesting way to overcome those difficulties is through the generation of data samples that could be similar enough to real sequences from ancestries of interest. A generalized model can be openly shared because there is no real individual information in there.\ 

Thus, we present a class-conditional Generative adversarial Model and a Conditional Generative Moment-Matching Network intended to generate new realistic genotypes of a desired ancestry. In addition, we present a privacy mechanism that extracts features from the real data to generate new realistic genotypes by using features.

}, author = {Perera, Maria}, editor = {Mas-Montserrat, Daniel and Xavier Gir{\'o}-i-Nieto and Ioannidis, Alexander G.} } @conference {cMas-Montserrat, title = {Generative Moment Matching Networks for Genotype Simulation}, booktitle = {44th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC{\textquoteright}22)}, year = {2022}, abstract = {

The generation of synthetic genomic sequences using neural networks has potential to overcome privacy and data sharing restrictions and to mitigate potential bias within datasets due to under-representation of some population groups. However, there is not a consensus on which architectures, training procedures, and evaluation metrics should be used when simulating single nucleotide polymorphism (SNP) sequences with neural networks. In this paper, we explore the use of Generative Moment Matching Networks (GMMNs) for SNP simulation, we present some architectural and procedural changes to properly train the networks, and we introduce an evaluation scheme to qualitatively and quantitatively asses the quality of the simulated sequences.

}, author = {Mas-Montserrat, Daniel and Perera, Maria and Barrab{\'e}s, M{\'\i}riam and Geleta, Margarita and Xavier Gir{\'o}-i-Nieto and Ioannidis, Alexander G.} }