Publication list

2022

  • Yongtian Wang, Liran Juan, Jiajie Peng, Tao Wang, Tianyi Zang, Yadong Wang . "Explore potential disease related metabolites based on latent factor model ", BMC Genomics , 2022, 23: 269.(Article)
  • [Bibtex]

    @article{nwpu_index90,
    year = {2022},
    author = {Yongtian Wang, Liran Juan, Jiajie Peng, Tao Wang, Tianyi Zang, Yadong Wang },
    journal = {BMC Genomics },
    title = {Explore potential disease related metabolites based on latent factor model },
    volume = {23},
    article number = {269},
    type = {Article},
    abstract = {"Background In biological systems, metabolomics can not only contribute to the discovery of metabolic signatures for disease diagnosis, but is very helpful to illustrate the underlying molecular disease-causing mechanism. Therefore, identification of disease-related metabolites is of great significance for comprehensively understanding the pathogenesis of diseases and improving clinical medicine. Results In the paper, we propose a disease and literature driven metabolism prediction model (DLMPM) to identify the potential associations between metabolites and diseases based on latent factor model. We build the disease glossary with disease terms from different databases and an association matrix based on the mapping between diseases and metabolites. The similarity of diseases and metabolites is used to complete the association matrix. Finally, we predict potential associations between metabolites and diseases based on the matrix decomposition method. In total, 1,406 direct associations between diseases and metabolites are found. There are 119,206 unknown associations between diseases and metabolites predicted with a coverage rate of 80.88%. Subsequently, we extract training sets and testing sets based on data increment from the database of disease-related metabolites and assess the performance of DLMPM on 19 diseases. As a result, DLMPM is proven to be successful in predicting potential metabolic signatures for human diseases with an average AUC value of 82.33%. Conclusion In this paper, a computational model is proposed for exploring metabolite-disease pairs and has good performance in predicting potential metabolites related to diseases through adequate validation. The results show that DLMPM has a better performance in prioritizing candidate diseases-related metabolites compared with the previous methods and would be helpful for researchers to reveal more information about human diseases." },
    source = {https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-022-08504-w },
    }

  • Tao Wang, Miguel E. Rentería, Jiajie Peng . "Editorial: Data Mining and Statistical Methods for Knowledge Discovery in Diseases Based on Multimodal Omics ", Frontiers in Genetics , 2022, 13: 895796.(Editorial Material)
  • [Bibtex]

    @article{nwpu_index91,
    year = {2022},
    author = {Tao Wang, Miguel E. Rentería, Jiajie Peng },
    journal = {Frontiers in Genetics },
    title = {Editorial: Data Mining and Statistical Methods for Knowledge Discovery in Diseases Based on Multimodal Omics },
    volume = {13},
    article number = {895796},
    type = {Editorial Material},
    abstract = { },
    source = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9086538/},
    }

  • Jie Liu, Xuequn Shang, Lingyun Song, Yacong Tan . "Research Progress of Graph Neural Networks on Complex Graph Mining ", Journal of Software , 2022, 33: 1-42.(Article)
  • [Bibtex]

    @article{nwpu_index94,
    year = {2022},
    author = {Jie Liu, Xuequn Shang, Lingyun Song, Yacong Tan },
    journal = {Journal of Software },
    title = {Research Progress of Graph Neural Networks on Complex Graph Mining },
    volume = {33},
    pages = {1-42},
    type = {Article},
    abstract = {Graph Neural Networks (GNNs) establish a deep learning framework for non-Euclidean spatial data. Compared with traditional network embedding methods, they perform deeper aggregating operations on graph structures. In recent years, GNNs have been extended to complex graphs. However, there lacks qualified surveys which gives comprehensive and systematic classification and summary on GNNs based on complex graphs. This paper divides the complex graphs into 3 categories, i.e., heterogeneous graphs, dynamic graphs and hypergraphs. GNNs based on heterogeneous graphs are divided into 2 types, i.e., edge-type aware and meta-path aware, according to the procedure the information is aggregated. Dynamic GNNs graphs are divided into three categories: RNN-based methods, autoencoder-based methods, and spatio-temporal graph neural networks. Hypergraph GNNs are divided into expansion methods and non-expansion methods, and the expansion methods are further divided into star-expansion, clique-expansion and line-expansion according to the expansion mode they use. We detailedly illustrate the core idea of every method, compare the advantages and disadvantages of different algorithms, systematically list the key procedures, (cross) application fields and commonly used data sets of different complex graph GNNs, and propose some possible research directions. },
    source = {http://www.jos.org.cn/jos/article/abstract/6626 },
    }

  • Lingyun Song, Mengzhen Yu, Xuequn Shang, Yu Lu, Jun Liu, Ying Zhang, Zhanhuai Li . "A deep grouping fusion neural network for multimedia content understanding ", IET Image Processing , 2022, 16: 2398-2411.(Article)
  • [Bibtex]

    @article{nwpu_index95,
    year = {2022},
    author = {Lingyun Song, Mengzhen Yu, Xuequn Shang, Yu Lu, Jun Liu, Ying Zhang, Zhanhuai Li },
    journal = {IET Image Processing },
    title = {A deep grouping fusion neural network for multimedia content understanding },
    volume = {16},
    pages = {2398-2411},
    type = {Article},
    abstract = {How Deep Neural Networks (DNNs) best cope with the understanding of multimedia contents still remains an open problem, mainly due to two factors. First, conventional DNNs cannot effectively learn the representations of the images with sparse visual information. For example, the images describing knowledge concepts in textbooks. Second, existing DNNs cannot effectively capture the fine-grained interactions between the images and text descriptions. To address these issues, we propose a deep Cross-Media Grouping Fusion Network (CMGFN), which mainly has two distinctive properties: 1) CMGFN can effectively learn visual features from the images with sparse visual information. This is achieved by first progressively adjusting the attention of convolution filters to valuable visual regions, and then enhancing the use of key visual information in feature construction. 2) By a cross-media grouping co-attention mechanism, CMGFN can effectively use the interactions between visual features of different semantics and textual descriptions, to learn cross-media features representing different fine-grained semantics in different groups. Empirical studies demonstrate that CMGFN not only achieves state-of-the-art performance on the multimedia documents containing sparse visual information, but also shows superior general applicability on other multimedia data, e.g., the multimedia fake news. },
    source = {https://ietresearch.onlinelibrary.wiley.com/doi/full/10.1049/ipr2.12496 },
    }

  • Wei Wang, Ruijiang Han, Menghan Zhang, Yuxian Wang, Tao Wang, Yongtian Wang, Xuequn Shang, Jiajie Peng . "A network-based method for brain disease gene prediction by integrating brain connectome and molecular network ", Briefings in Bioinformatics , 2022, 23: bbab459.(Article)
  • [Bibtex]

    @article{nwpu_index96,
    year = {2022},
    author = {Wei Wang, Ruijiang Han, Menghan Zhang, Yuxian Wang, Tao Wang, Yongtian Wang, Xuequn Shang, Jiajie Peng },
    journal = {Briefings in Bioinformatics },
    title = {A network-based method for brain disease gene prediction by integrating brain connectome and molecular network },
    volume = {23},
    article number = {bbab459},
    type = {Article},
    abstract = {Brain disease gene identification is critical for revealing the biological mechanism and developing drugs for brain diseases. To enhance the identification of brain disease genes, similarity-based computational methods, especially network-based methods, have been adopted for narrowing down the searching space. However, these network-based methods only use molecular networks, ignoring brain connectome data, which have been widely used in many brain-related studies. In our study, we propose a novel framework, named brainMI, for integrating brain connectome data and molecular-based gene association networks to predict brain disease genes. For the consistent representation of molecular-based network data and brain connectome data, brainMI first constructs a novel gene network, called brain functional connectivity (BFC)-based gene network, based on resting-state functional magnetic resonance imaging data and brain region-specific gene expression data. Then, a multiple network integration method is proposed to learn low-dimensional features of genes by integrating the BFC-based gene network and existing protein–protein interaction networks. Finally, these features are utilized to predict brain disease genes based on a support vector machine-based model. We evaluate brainMI on four brain diseases, including Alzheimer’s disease, Parkinson’s disease, major depressive disorder and autism. brainMI achieves of 0.761, 0.729, 0.728 and 0.744 using the BFC-based gene network alone and enhances the molecular network-based performance by 6.3% on average. In addition, the results show that brainMI achieves higher performance in predicting brain disease genes compared to the existing three state-of-the-art methods. },
    source = {https://academic.oup.com/bib/article-abstract/23/1/bbab459/6415315 },
    }

  • Xingyi Li, Min Li, Ju Xiang, Zhelin Zhao, Xuequn Shang . "SEPA: signaling entropy-based algorithm to evaluate personalized pathway activation for survival analysis on pan-cancer data ", Bioinformatics , 2022, 38: 2536-2543.(Article)
  • [Bibtex]

    @article{nwpu_index106,
    year = {2022},
    author = {Xingyi Li, Min Li, Ju Xiang, Zhelin Zhao, Xuequn Shang },
    journal = {Bioinformatics },
    title = {SEPA: signaling entropy-based algorithm to evaluate personalized pathway activation for survival analysis on pan-cancer data },
    volume = {38},
    pages = {2536-2543},
    type = {Article},
    abstract = {"Motivation Biomarkers with prognostic ability and biological interpretability can be used to support decision-making in the survival analysis. Genes usually form functional modules to play synergistic roles, such as pathways. Predicting significant features from the functional level can effectively reduce the adverse effects of heterogeneity and obtain more reproducible and interpretable biomarkers. Personalized pathway activation inference can quantify the dysregulation of essential pathways involved in the initiation and progression of cancers, and can contribute to the development of personalized medical treatments. Results In this study, we propose a novel method to evaluate personalized pathway activation based on signaling entropy for survival analysis (SEPA), which is a new attempt to introduce the information-theoretic entropy in generating pathway representation for each patient. SEPA effectively integrates pathway-level information into gene expression data, converting the high-dimensional gene expression data into the low-dimensional biological pathway activation scores. SEPA shows its classification power on the prognostic pan-cancer genomic data, and the potential pathway markers identified based on SEPA have statistical significance in the discrimination of high- and low-risk cohorts and are likely to be associated with the initiation and progress of cancers. The results show that SEPA scores can be used as an indicator to precisely distinguish cancer patients with different clinical outcomes, and identify important pathway features with strong discriminative power and biological interpretability." },
    source = {https://academic.oup.com/bioinformatics/article-abstract/38/9/2536/6535231?login=false },
    }

  • Lingyun Song, Xuequn Shang, Chen Yang, Mingxuan Sun. "Attribute-Guided Multiple Instance Hashing Network for Cross-Modal Zero-Shot Hashing", IEEE Transactions on Multimedia, 2022, Early Access.(Article)
  • [Bibtex]

    @article{nwpu_index119,
    year = {2022},
    author = {Lingyun Song, Xuequn Shang, Chen Yang, Mingxuan Sun},
    journal = {IEEE Transactions on Multimedia},
    title = {Attribute-Guided Multiple Instance Hashing Network for Cross-Modal Zero-Shot Hashing},
    volume = {Early Access},
    type = {Article},
    abstract = {Cross-Modal Zero-Shot Hashing (CMZSH) is an important image retrieval technique, e.g., Text Based Image Retrieval. Most of existing CMZSH methods mainly use semantic attributes as guidance to generate hash codes for both the images and texts of seen and unseen categories. However, existing CMZSH methods only focus on learning global attribute vectors and hash codes for images, which mixes up information of complex semantics and background clutters, and thus impedes the retrieval performance. To solve this issue, we propose an Attribute-Guided Multiple Instance Hashing (AG-MIH) network for CMZSH, where each instance represents one image region. Instead of generating global image hash codes, AG-MIH can effectively learn instance-level hash codes based on instance attributes. To improve the attribute learning for instances, AG-MIH can exploit a novel 2-D Category-Attribute Relation (CAR) layer, which uses different matching templates to model the relationships between each instance and the attributes for different categories. Under the guidance of semantic attributes, AG-MIH can effectively learn hash codes for each visual instance and texts by a Multi-stream Instance Hashing Refinement (MIHR) procedure. In the MIHR, the pseudo supervisions for the instance-level attributes and hash codes in each stream are from its proceeding stream. Empirical studies on benchmark datasets show that AG-MIH achieves state-of-the-art performance on both cross-modal and single-modal zero-shot image retrieval tasks. },
    source = {https://ieeexplore.ieee.org/document/9827595 },
    }

2021

  • Jiaqi Cui, Yupei Zhang, Rui An, Yue Yun, Huan Dai, Xuequn Shang . "Identifying Key Features in Student Grade Prediction ", In 2021 IEEE International Conference on Progress in Informatics and Computing (PIC),2021: 519-523.(Full Paper)
  • [Bibtex]

    @conference{nwpu_index78,
    year = {2021},
    author = {Jiaqi Cui, Yupei Zhang, Rui An, Yue Yun, Huan Dai, Xuequn Shang },
    booktitle = {2021 IEEE International Conference on Progress in Informatics and Computing (PIC)},
    title = {Identifying Key Features in Student Grade Prediction },
    pages = {519-523},
    type = {Full Paper},
    abstract = {With the development of education data mining and the data of academic affairs accumulated, the performance of students in school could be analyzed from different views and explore more precious aspects which influence the grades of students. Our research conducts data mining on student basic courses information, learning behavior information and admission information, which will help to find the relationship between them. This work mainly focus on exploring the key features that take the important roles in student academic performance. Then the work takes the consider of identifying the relationship between student behaviors and their grades. By using the advanced machine learning methods and feature analysis methods, LASSO, the work rated the most important features of student behaviors. We found several key relationships between student behaviors and their grades, for example, the more books one borrows, the better grade he/she will get. This work would help the educators and students to better understand the relationship between connotative factors and the student achievement.},
    source = {https://ieeexplore.ieee.org/abstract/document/9687042/authors#authors},
    }

  • Yupei Zhang, Yue Yun, Rui An, Jiaqi Cui, Huan Dai, Xuequn Shang . "Educational Data Mining Techniques for Student Performance Prediction: Method Review and Comparison Analysis", Frontiers in Psychology, 2021, 12: 698490.(Review)
  • [Bibtex]

    @article{nwpu_index81,
    year = {2021},
    author = {Yupei Zhang, Yue Yun, Rui An, Jiaqi Cui, Huan Dai, Xuequn Shang },
    journal = {Frontiers in Psychology},
    title = {Educational Data Mining Techniques for Student Performance Prediction: Method Review and Comparison Analysis},
    volume = {12},
    article number = {698490},
    type = {Review},
    abstract = {Student performance prediction (SPP) aims to evaluate the grade that a student will reach before enrolling in a course or taking an exam. This prediction problem is a kernel task toward personalized education and has attracted increasing attention in the field of artificial intelligence and educational data mining (EDM). This paper provides a systematic review of the SPP study from the perspective of machine learning and data mining. This review partitions SPP into five stages, i.e., data collection, problem formalization, model, prediction, and application. To have an intuition on these involved methods, we conducted experiments on a data set from our institute and a public data set. Our educational dataset composed of 1,325 students, and 832 courses was collected from the information system, which represents a typical higher education in China. With the experimental results, discussions on current shortcomings and interesting future works are finally summarized from data collections to practices. This work provides developments and challenges in the study task of SPP and facilitates the progress of personalized education.},
    source = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8688359/},
    }

  • Huan Dai, Yupei Zhang, Yue Yun, Xuequn Shang . "An Improved Deep Model for Knowledge Tracing and Question-Difficulty Discovery", In PRICAI 2021: Trends in Artificial Intelligence, vol.13032,2021: 362-375.(Full Paper)
  • [Bibtex]

    @conference{nwpu_index82,
    year = {2021},
    author = {Huan Dai, Yupei Zhang, Yue Yun, Xuequn Shang },
    booktitle = {PRICAI 2021: Trends in Artificial Intelligence},
    title = {An Improved Deep Model for Knowledge Tracing and Question-Difficulty Discovery},
    volume = {13032},
    pages = {362-375},
    type = {Full Paper},
    abstract = {Knowledge Tracing (KT) aims to analyze a student’s acquisition of skills over time by examining the student’s performance on questions of those skills. In recent years, a recurrent neural network model called deep knowledge tracing (DKT) has been proposed to handle the knowledge tracing task and literature has shown that DKT generally outperforms traditional methods. However, DKT and its variants often lead to oscillation results on a skill’s state may due to it ignoring the skill’s difficulty or the question’s difficulty. As a result, even when a student performs well on a skill, the prediction of that skill’s mastery level decreases instead, and vice versa. This is undesirable and unreasonable because student’s performance is expected to transit gradually over time. In this paper, we propose to learn the knowledge tracing model in a “simple-to-difficult” process, leading to a method of Self-paced Deep Knowledge Tracing (SPDKT). SPDKT learns the difficulty of per question from the student’s responses to optimize the question’s order and smooth the learning process. With mitigating the cause of oscillations, SPDKT has the capability of robustness to the puzzling questions. The experiments on real-world datasets show SPDKT achieves state-of-the-art performance on question response prediction and reaches interesting interpretations in education. },
    source = {https://linkspringer.53yu.com/chapter/10.1007/978-3-030-89363-7_28 },
    }

  • Yupei Zhang, Rui An, Shuhui Liu, Jiaqi Cui, Xuequn Shang . "Predicting and Understanding Student Learning Performance Using Multi-source Sparse Attention Convolutional Neural Networks ", IEEE Transactions on Big Data , 2021, online.(Article)
  • [Bibtex]

    @article{nwpu_index83,
    year = {2021},
    author = {Yupei Zhang, Rui An, Shuhui Liu, Jiaqi Cui, Xuequn Shang },
    journal = {IEEE Transactions on Big Data },
    title = {Predicting and Understanding Student Learning Performance Using Multi-source Sparse Attention Convolutional Neural Networks },
    volume = {online},
    type = {Article},
    abstract = {Predicting and understanding student learning performance has been a long-standing task in learning science, which can benefit personalized teaching and learning. This study shows that the progress towards this task can be accelerated by using learning record data to feed a deep learning model that considers the intrinsic course association and the structured features. We proposed a multi-source sparse attention convolutional neural network (MsaCNN) to predict the course grades in a general formulation. MsaCNN adopts multi-scale convolution kernels on student grade records to capture structured features, a global attention strategy to discover the relationship between courses, and multiple input-heads to integrate multi-source features. All achieved features are then poured into a softmax classifier towards an end-to-end supervised deep learning model. Conducting insights into higher education on real-world university datasets, the results show that MsaCNN achieves better performance than traditional methods and delivers an interpretation of student performance by virtue of the resulted course relationships. Inspired by this interpretation, we created an association map for all mentioned courses, followed by evaluating the map with a questionnaire survey. },
    source = {https://ieeexplore.ieee.org/abstract/document/9601269 },
    }

  • Yue Yun, Huan Dai, Yupei Zhang, Xuequn Shang, Zhanhuai Li . "State-of-the-Art Survey of Personalized Learning Path Recommendation ", Journal of Software , 2021, online.(Article)
  • [Bibtex]

    @article{nwpu_index84,
    year = {2021},
    author = {Yue Yun, Huan Dai, Yupei Zhang, Xuequn Shang, Zhanhuai Li },
    journal = {Journal of Software },
    title = {State-of-the-Art Survey of Personalized Learning Path Recommendation },
    volume = {online},
    type = {Article},
    abstract = {Recently, with the rapid development of information technology, emerging technologies represented by artificial intelligence are widly applied in education, triggering profound changes in the concept and mode of learning. And, online learning transcends the limitations of time and space, providing more possibilities for learners to learn "anytime and anywhere". However, the separation of time and space of teachers and students in online learning makes teachers could not handle students' learning process, limits the quality of teaching and learning. Diversified learning targets and massive learning resources generate some new problems, i.e., how to quickly accomplish learning targets, reduce learning costs and reasonably allocate learning resources. And these problems have become the limitations of the development of individuals and the society. However, traditional "one size fitsall" educational model can no longer fit human's nedds, thus, we need one more effieient and scientific personalized education model to help learners maximize their learning targets with minimal learning costs. Based on these considerations, what we need is to new adaptive learning system which could automatically and efficiently identify learner personalized characteristics, efficiently organize and allocate learning resources, and plan a global personalized learning path. In this paper, we systematically review and analyze the current researches on personalized learning path recommendation, and we analyze different research sight from multidisciplinary perspective. Then, we summarize the most applied algorithm in current research. Finally, we highlight the main shortcomings of the current rearch, which we should pay more attention to. },
    source = {http://www.jos.org.cn/josen/article/abstract/6518 },
    }

  • Yupei Zhang, Shuhui Liu, Xuequn Shang . "An MRI Study on Effects of Math Education on Brain Development Using Multi-Instance Contrastive Learning ", Frontiers in Fsychology , 2021, 12: 765754.(Article)
  • [Bibtex]

    @article{nwpu_index86,
    year = {2021},
    author = {Yupei Zhang, Shuhui Liu, Xuequn Shang },
    journal = {Frontiers in Fsychology },
    title = {An MRI Study on Effects of Math Education on Brain Development Using Multi-Instance Contrastive Learning },
    volume = {12},
    article number = {765754},
    type = {Article},
    abstract = {This paper explores whether mathematical education has effects on brain development from the perspective of brain MRIs. While biochemical changes in the left middle front gyrus region of the brain have been investigated, we proposed to classify students by using MRIs from the intraparietal sulcus (IPS) region that was left untouched in the previous study. On the cropped IPS regions, the proposed model developed popular contrastive learning (CL) to solve the problem of multi-instance representation learning. The resulted data representations were then fed into a linear neural network to identify whether students were in the math group or the non-math group. Experiments were conducted on 123 adolescent students, including 72 math students and 51 non-math students. The proposed model achieved an accuracy of 90.24 % for student classification, gaining more than 5% improvements compared to the classical CL frame. Our study provides not only a multi-instance extension to CL and but also an MRI insight into the impact of mathematical studying on brain development. },
    source = {https://www.frontiersin.org/articles/10.3389/fpsyg.2021.765754/full },
    }

  • Shuhui Liu, Yupei Zhang, Xuequn Shang, Zhaolei Zhang . "ProTICS reveals prognostic impact of tumor infiltrating immune cells in different molecular subtypes ", Briefings in Bioinformatics , 2021, 22: bbab164.(Article)
  • [Bibtex]

    @article{nwpu_index87,
    year = {2021},
    author = {Shuhui Liu, Yupei Zhang, Xuequn Shang, Zhaolei Zhang },
    journal = {Briefings in Bioinformatics },
    title = {ProTICS reveals prognostic impact of tumor infiltrating immune cells in different molecular subtypes },
    volume = {22},
    article number = {bbab164},
    type = {Article},
    abstract = {Different subtypes of the same cancer often show distinct genomic signatures and require targeted treatments. The differences at the cellular and molecular levels of tumor microenvironment in different cancer subtypes have significant effects on tumor pathogenesis and prognostic outcomes. Although there have been significant researches on the prognostic association of tumor infiltrating lymphocytes in selected histological subtypes, few investigations have systemically reported the prognostic impacts of immune cells in molecular subtypes, as quantified by machine learning approaches on multi-omics datasets. This paper describes a new computational framework, ProTICS, to quantify the differences in the proportion of immune cells in tumor microenvironment and estimate their prognostic effects in different subtypes. First, we stratified patients into molecular subtypes based on gene expression and methylation profiles by applying nonnegative tensor factorization technique. Then we quantified the proportion of cell types in each specimen using an mRNA-based deconvolution method. For tumors in each subtype, we estimated the prognostic effects of immune cell types by applying Cox proportional hazard regression. At the molecular level, we also predicted the prognosis of signature genes for each subtype. Finally, we benchmarked the performance of ProTICS on three TCGA datasets and another independent METABRIC dataset. ProTICS successfully stratified tumors into different molecular subtypes manifested by distinct overall survival. Furthermore, the different immune cell types showed distinct prognostic patterns with respect to molecular subtypes. This study provides new insights into the prognostic association between immune cells and molecular subtypes, showing the utility of immune cells as potential prognostic markers. Availability: R code is available at https://github.com/liu-shuhui/ProTICS },
    source = {https://academic.oup.com/bib/article-abstract/22/6/bbab164/6271999?login=false },
    }

  • Tao Wang, Yongzhuang Liu, Junpeng Ruan, Xianjun Dong, Yadong Wang, Jiajie Peng . "A pipeline for RNA-seq based eQTL analysis with automated quality control procedures ", BMC Bioinformatics , 2021, 22: 403.(Article)
  • [Bibtex]

    @article{nwpu_index92,
    year = {2021},
    author = {Tao Wang, Yongzhuang Liu, Junpeng Ruan, Xianjun Dong, Yadong Wang, Jiajie Peng },
    journal = {BMC Bioinformatics },
    title = {A pipeline for RNA-seq based eQTL analysis with automated quality control procedures },
    volume = {22},
    article number = {403},
    type = {Article},
    abstract = {Background: Advances in the expression quantitative trait loci (eQTL) studies have provided valuable insights into the mechanism of diseases and traits-associated genetic variants. However, it remains challenging to evaluate and control the quality of multi-source heterogeneous eQTL raw data for researchers with limited computational background. There is an urgent need to develop a powerful and user-friendly tool to automatically process the raw datasets in various formats and perform the eQTL mapping afterward. Results: In this work, we present a pipeline for eQTL analysis, termed eQTLQC, featured with automated data preprocessing for both genotype data and gene expression data. Our pipeline provides a set of quality control and normalization approaches, and utilizes automated techniques to reduce manual intervention. We demonstrate the utility and robustness of this pipeline by performing eQTL case studies using multiple independent real-world datasets with RNA-seq data and whole genome sequencing (WGS) based genotype data. Conclusions: eQTLQC provides a reliable computational workflow for eQTL analysis. It provides standard quality control and normalization as well as eQTL mapping procedures for eQTL raw data in multiple formats. The source code, demo data, and instructions are freely available at https://github.com/stormlovetao/eQTLQC. },
    source = {https://linkspringer.53yu.com/article/10.1186/s12859-021-04307-0 },
    }

  • Lingyun Song, Jun Liu, Mingxuan Sun, Xuequn Shang . "Weakly Supervised Group Mask Network for Object Detection ", International Journal of Computer Vision , 2021, 129: 681-702.(Article)
  • [Bibtex]

    @article{nwpu_index93,
    year = {2021},
    author = {Lingyun Song, Jun Liu, Mingxuan Sun, Xuequn Shang },
    journal = {International Journal of Computer Vision },
    title = {Weakly Supervised Group Mask Network for Object Detection },
    volume = {129},
    pages = {681-702},
    type = {Article},
    abstract = {Learning object detectors from weak image annotations is an important yet challenging problem. Many weakly supervised approaches formulate the task as a multiple instance learning problem, where each image is represented as a bag of instances. For predicting the score for each object that occurs in an image, existing MIL based approaches tend to select the instance that responds more strongly to a specific class, which, however, overlooks the contextual information. Besides, objects often exhibit dramatic variations such as scaling and transformations, which makes them hard to detect. In this paper, we propose the weakly supervised group mask network (WSGMN), which mainly has two distinctive properties: (i) it exploits the relations among regions to generate community instances, which contain context information and are robust to object variations. (ii) It generates a mask for each label group, and utilizes these masks to dynamically select the feature information of the most useful community instances for recognizing specific objects. Extensive experiments on several benchmark datasets demonstrate the effectiveness of WSGMN on the tasks of weakly supervised object detection. },
    source = {https://linkspringer.53yu.com/article/10.1007/s11263-020-01397-w },
    }

  • Jiajie Peng, Yuxian Wang, Jiaojiao Guan, Jingyi Li, Ruijiang Han, Jianye Hao, Zhongyu Wei, Xuequn Shang . "An end-to-end heterogeneous graph representation learning-based framework for drug-target interaction prediction ", Briefings in Bioinformatics , 2021, 22: bbaa430.(Article)
  • [Bibtex]

    @article{nwpu_index97,
    year = {2021},
    author = {Jiajie Peng, Yuxian Wang, Jiaojiao Guan, Jingyi Li, Ruijiang Han, Jianye Hao, Zhongyu Wei, Xuequn Shang },
    journal = {Briefings in Bioinformatics },
    title = {An end-to-end heterogeneous graph representation learning-based framework for drug-target interaction prediction },
    volume = {22},
    article number = {bbaa430},
    type = {Article},
    abstract = {Accurately identifying potential drug–target interactions (DTIs) is a key step in drug discovery. Although many related experimental studies have been carried out for identifying DTIs in the past few decades, the biological experiment-based DTI identification is still timeconsuming and expensive. Therefore, it is of great significance to develop effective computational methods for identifying DTIs. In this paper, we develop a novel ‘end-to-end’ learning-based framework based on heterogeneous ‘graph’ convolutional networks for ‘DTI’ prediction called end-to-end graph (EEG)-DTI. Given a heterogeneous network containing multiple types of biological entities (i.e. drug, protein, disease, side-effect), EEG-DTI learns the low-dimensional feature representation of drugs and targets using a graph convolutional networks-based model and predicts DTIs based on the learned features. During the training process, EEG-DTI learns the feature representation of nodes in an end-to-end mode. The evaluation test shows that EEG-DTI performs better than existing state-of-art methods. The data and source code are available at: https://github.com/MedicineBiology-AI/EEG-DTI. },
    source = {https://academic.oup.com/bib/article-abstract/22/5/bbaa430/6124914?login=false },
    }

  • Jiajie Peng, Hansheng Xue, Zhongyu Wei, Idil Tuncali, Jianye Hao, Xuequn Shang . "Integrating multi-network topology for gene function prediction using deep neural networks ", Briefings in Bioinformatics , 2021, 22: 2096-2105.(Article)
  • [Bibtex]

    @article{nwpu_index98,
    year = {2021},
    author = {Jiajie Peng, Hansheng Xue, Zhongyu Wei, Idil Tuncali, Jianye Hao, Xuequn Shang },
    journal = {Briefings in Bioinformatics },
    title = {Integrating multi-network topology for gene function prediction using deep neural networks },
    volume = {22},
    pages = {2096-2105},
    type = {Article},
    abstract = {"Motivation The emergence of abundant biological networks, which benefit from the development of advanced high-throughput techniques, contributes to describing and modeling complex internal interactions among biological entities such as genes and proteins. Multiple networks provide rich information for inferring the function of genes or proteins. To extract functional patterns of genes based on multiple heterogeneous networks, network embedding-based methods, aiming to capture non-linear and low-dimensional feature representation based on network biology, have recently achieved remarkable performance in gene function prediction. However, existing methods do not consider the shared information among different networks during the feature learning process. Results Taking the correlation among the networks into account, we design a novel semi-supervised autoencoder method to integrate multiple networks and generate a low-dimensional feature representation. Then we utilize a convolutional neural network based on the integrated feature embedding to annotate unlabeled gene functions. We test our method on both yeast and human datasets and compare with three state-of-the-art methods. The results demonstrate the superior performance of our method. We not only provide a comprehensive analysis of the performance of the newly proposed algorithm but also provide a tool for extracting features of genes based on multiple networks, which can be used in the downstream machine learning task." },
    source = {https://academic.oup.com/bib/article-abstract/22/2/2096/5816013?login=false },
    }

  • Yan Zheng, Yuanke Zhong, Jialu Hu, Xuequn Shang . "SCC: an accurate imputation method for scRNA-seq dropouts based on a mixture model ", BMC Bioinformatics , 2021, 22: 5.(Article)
  • [Bibtex]

    @article{nwpu_index113,
    year = {2021},
    author = {Yan Zheng, Yuanke Zhong, Jialu Hu, Xuequn Shang },
    journal = {BMC Bioinformatics },
    title = {SCC: an accurate imputation method for scRNA-seq dropouts based on a mixture model },
    volume = {22},
    article number = {5},
    type = {Article},
    abstract = {"Background Single-cell RNA sequencing (scRNA-seq) enables the possibility of many in-depth transcriptomic analyses at a single-cell resolution. It’s already widely used for exploring the dynamic development process of life, studying the gene regulation mechanism, and discovering new cell types. However, the low RNA capture rate, which cause highly sparse expression with dropout, makes it difficult to do downstream analyses. Results We propose a new method SCC to impute the dropouts of scRNA-seq data. Experiment results show that SCC gives competitive results compared to two existing methods while showing superiority in reducing the intra-class distance of cells and improving the clustering accuracy in both simulation and real data. Conclusions SCC is an effective tool to resolve the dropout noise in scRNA-seq data. The code is freely accessible at https://github.com/nwpuzhengyan/SCC." },
    source = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-020-03878-8 },
    }

  • Jialu Hu, Yuanke Zhong, Xuequn Shang . "A versatile and scalable single-cell data integration algorithm based on domain-adversarial and variational approximation ", Briefings in Bioinformatics , 2021, 23: bbab400.(Article)
  • [Bibtex]

    @article{nwpu_index114,
    year = {2021},
    author = {Jialu Hu, Yuanke Zhong, Xuequn Shang },
    journal = {Briefings in Bioinformatics },
    title = {A versatile and scalable single-cell data integration algorithm based on domain-adversarial and variational approximation },
    volume = {23},
    article number = {bbab400},
    type = {Article},
    abstract = {Single-cell technologies provide us new ways to profile transcriptomic landscape, chromatin accessibility, spatial expression patterns in heterogeneous tissues at the resolution of single cell. With enormous generated single-cell datasets, a key analytic challenge is to integrate these datasets to gain biological insights into cellular compositions. Here, we developed a domain-adversarial and variational approximation, DAVAE, which can integrate multiple single-cell datasets across samples, technologies and modalities with a single strategy. Besides, DAVAE can also integrate paired data of ATAC profile and transcriptome profile that are simultaneously measured from a same cell. With a mini-batch stochastic gradient descent strategy, it is scalable for large-scale data and can be accelerated by GPUs. Results on seven real data integration applications demonstrated the effectiveness and scalability of DAVAE in batch-effect removing, transfer learning and cell-type predictions for multiple single-cell datasets across samples, technologies and modalities. Availability: DAVAE has been implemented in a toolkit package “scbean” in the pypi repository, and the source code can be also freely accessible at https://github.com/jhu99/scbean. All our data and source code for reproducing the results of this paper can be accessible at https://github.com/jhu99/davae_paper. },
    source = {https://academic.oup.com/bib/article-abstract/23/1/bbab400/6377528?login=false },
    }

  • Bolin Chen, Li Gao, Xuequn Shang . "A two-way rectification method for identifying differentially expressed genes by maximizing the co-function relationship ", BMC Genomics , 2021, 22: 471.(Article)
  • [Bibtex]

    @article{nwpu_index115,
    year = {2021},
    author = {Bolin Chen, Li Gao, Xuequn Shang },
    journal = {BMC Genomics },
    title = {A two-way rectification method for identifying differentially expressed genes by maximizing the co-function relationship },
    volume = {22},
    article number = {471},
    type = {Article},
    abstract = {"Background The identification of differentially expressed genes (DEGs) is an important task in many biological studies. The currently widely used methods often calculate a score for each gene by estimating the significance level in terms of the differential expression. However, biological experiments often have only three duplications, plus plenty of noises contain in gene expression datasets, which brings a great challenge to statistical analysis methods. Moreover, the abundance of gene expression levels are not evenly distributed. Thus, those low expressed genes are more easily to be detected by fold-change based methods, which may results in high false positives among the DEG list. Since phenotypical changes result from DEGs should be strongly related to several distinct cellular functions, a more robust method should be designed to increase the true positive rate of the functional related DEGs. Results In this study, we propose a two-way rectification method for identifying DEGs by maximizing the co-function relationships between genes and their enriched cellular pathways. An iteration strategy is employed to sequentially narrow down the group of identified DEGs and their associated biological functions. Functional analyses reveal that the identified DEGs are well organized in the form of functional modules, and the enriched pathways are very significant with lower p-value and larger gene count. Conclusions An integrative rectification method was proposed to identify key DEGs and their related functions simultaneously. The experimental validations demonstrate that the method has high interpretability and feasibility. It performs very well in terms of the identification of remarkable functional related genes." },
    source = {https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-021-07772-2 },
    }

2020

  • Yupei Zhang, Shuhui Liu. "Integrated Sparse Coding With Graph Learning for Robust Data Representation", IEEE Access, 2020, 8: 161245-161260.(Article)
  • [Bibtex]

    @article{nwpu_index120,
    year = {2020},
    author = {Yupei Zhang, Shuhui Liu},
    journal = {IEEE Access},
    title = {Integrated Sparse Coding With Graph Learning for Robust Data Representation},
    volume = {8},
    pages = {161245-161260},
    type = {Article},
    abstract = {Sparse coding is a popular technique for achieving compact data representation and has been used in many applications. However, the instability issue often causes degeneration in practice and thus attracts a lot of studies. While the traditional graph sparse coding preserves the neighborhood structure of the data, this study integrates the low-rank representation(LRR) to fix the inconsistency of sparse coding by holding the subspace structures of the high-dimensional observations. The proposed method is dubbed low-rank graph regularized sparse coding (LogSC), which learns sparse codes and low-rank representations jointly rather than the traditional two-step approach. Since the two data representations share a dictionary matrix, the resulted sparse representation on this dictionary could be benefited from LRR. We solved the optimization problem of LogSC by using the linearized alternating direction method with adaptive penalty. Experimental results show the proposed method is discriminative in feature learning and robust to various noises. This work provides a one-step approach to integrating graph embedding in representation learning. },
    source = {https://ieeexplore.ieee.org/document/9184845 },
    }

  • Yupei Zhang, Huan Dai, Yue Yun, Shuhui Liu, Andrew Lan, Xuequn Shang . "Meta-knowledge dictionary learning on 1-bit response data for student knowledge diagnosis ", Knowledge-Based Systems , 2020, 205: 106290.(Article)
  • [Bibtex]

    @article{nwpu_index89,
    year = {2020},
    author = {Yupei Zhang, Huan Dai, Yue Yun, Shuhui Liu, Andrew Lan, Xuequn Shang },
    journal = {Knowledge-Based Systems },
    title = {Meta-knowledge dictionary learning on 1-bit response data for student knowledge diagnosis },
    volume = {205},
    article number = {106290},
    type = {Article},
    abstract = {This paper focuses on the problem of student knowledge diagnosis that is a basic task of realizing personalized education. Most traditional methods rely on the question-concept matrix empirically designed by experts. However, the expert concepts are expensive and inter-overlapping in their constructions, leading to ambiguous explanations. With the intuition that each student can master a part of the knowledge involved in all questions, in this paper, we propose a novel learning-based model for student knowledge diagnosis, dubbed Meta-knowledge Dictionary Learning (metaDL). MetaDL aims to learn a meta-knowledge dictionary from student responses, where any knowledge entity (e.g., student, question or expert concept) is a linear combination of a few atoms in the meta-knowledge dictionary. The resultant problem could be effectively solved by developing the alternating direction method of multipliers. This study has three innovations: learning independent meta-knowledges instead of traditional complex concepts, sparely representing knowledge entity instead of densely weighted representation, and interpreting expert concepts with the resulting meta-knowledges. For evaluation, the diagnosis results from metaDL are used to group students and predict responses on two public datasets and a private dataset from our institution. The experiment results show that metaDL delivers an effective student knowledge diagnosis and then results in good performances on the two applications in comparison with other methods. This technique could provide significant insights into student’s knowledge state and facilitate the progress on personalized education. },
    source = {https://www.sciencedirect.com/science/article/pii/S0950705120304664 },
    }

  • Jiajie Peng, Junya Lu, Donghee Hoh, Ayesha S Dina, Xuequn Shang, David M Kramer, Jin Chen . "Identifying emerging phenomenon in long temporal phenotyping experiments ", Briefings in Bioinformatics , 2020, 36: 568-577.(Article)
  • [Bibtex]

    @article{nwpu_index99,
    year = {2020},
    author = {Jiajie Peng, Junya Lu, Donghee Hoh, Ayesha S Dina, Xuequn Shang, David M Kramer, Jin Chen },
    journal = {Briefings in Bioinformatics },
    title = {Identifying emerging phenomenon in long temporal phenotyping experiments },
    volume = {36},
    pages = {568-577},
    type = {Article},
    abstract = {"Motivation The rapid improvement of phenotyping capability, accuracy and throughput have greatly increased the volume and diversity of phenomics data. A remaining challenge is an efficient way to identify phenotypic patterns to improve our understanding of the quantitative variation of complex phenotypes, and to attribute gene functions. To address this challenge, we developed a new algorithm to identify emerging phenomena from large-scale temporal plant phenotyping experiments. An emerging phenomenon is defined as a group of genotypes who exhibit a coherent phenotype pattern during a relatively short time. Emerging phenomena are highly transient and diverse, and are dependent in complex ways on both environmental conditions and development. Identifying emerging phenomena may help biologists to examine potential relationships among phenotypes and genotypes in a genetically diverse population and to associate such relationships with the change of environments or development. Results We present an emerging phenomenon identification tool called Temporal Emerging Phenomenon Finder (TEP-Finder). Using large-scale longitudinal phenomics data as input, TEP-Finder first encodes the complicated phenotypic patterns into a dynamic phenotype network. Then, emerging phenomena in different temporal scales are identified from dynamic phenotype network using a maximal clique based approach. Meanwhile, a directed acyclic network of emerging phenomena is composed to model the relationships among the emerging phenomena. The experiment that compares TEP-Finder with two state-of-art algorithms shows that the emerging phenomena identified by TEP-Finder are more functionally specific, robust and biologically significant." },
    source = {https://academic.oup.com/bioinformatics/article/36/2/568/5532221?login=false },
    }

  • Yuanke Zhong, Jing Li, Junhao He, Yiqun Gao, Jie Liu, Jingru Wang, Xuequn Shang, Jialu Hu . "Twadn: an efficient alignment algorithm based on time warping for pairwise dynamic networks ", BMC Bioinformatics , 2020, 21: 385.(Article)
  • [Bibtex]

    @article{nwpu_index112,
    year = {2020},
    author = {Yuanke Zhong, Jing Li, Junhao He, Yiqun Gao, Jie Liu, Jingru Wang, Xuequn Shang, Jialu Hu },
    journal = {BMC Bioinformatics },
    title = {Twadn: an efficient alignment algorithm based on time warping for pairwise dynamic networks },
    volume = {21},
    article number = {385},
    type = {Article},
    abstract = {"Background Network alignment is an efficient computational framework in the prediction of protein function and phylogenetic relationships in systems biology. However, most of existing alignment methods focus on aligning PPIs based on static network model, which are actually dynamic in real-world systems. The dynamic characteristic of PPI networks is essential for understanding the evolution and regulation mechanism at the molecular level and there is still much room to improve the alignment quality in dynamic networks. Results In this paper, we proposed a novel alignment algorithm, Twadn, to align dynamic PPI networks based on a strategy of time warping. We compare Twadn with the existing dynamic network alignment algorithm DynaMAGNA++ and DynaWAVE and use area under the receiver operating characteristic curve and area under the precision-recall curve as evaluation indicators. The experimental results show that Twadn is superior to DynaMAGNA++ and DynaWAVE. In addition, we use protein interaction network of Drosophila to compare Twadn and the static network alignment algorithm NetCoffee2 and experimental results show that Twadn is able to capture timing information compared to NetCoffee2. Conclusions Twadn is a versatile and efficient alignment tool that can be applied to dynamic network. Hopefully, its application can benefit the research community in the fields of molecular function and evolution." },
    source = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-020-03672-6 },
    }

  • Bolin Chen, Manting Yang, Li Gao, Tao Jiang and Xuequn Shang . "A functional network construction method to interpret the pathological process of colorectal cancer ", International Journal of Data Mining and Bioinformatics , 2020, 23: 251-264.(Article)
  • [Bibtex]

    @article{nwpu_index116,
    year = {2020},
    author = {Bolin Chen, Manting Yang, Li Gao, Tao Jiang and Xuequn Shang },
    journal = {International Journal of Data Mining and Bioinformatics },
    title = {A functional network construction method to interpret the pathological process of colorectal cancer },
    volume = {23},
    pages = {251-264},
    type = {Article},
    abstract = {The prognosis of cancers remains a challenge due to the limited understanding of their pathogenic mechanisms and cancerous processes. To overcome this, many studies employ a set of Differentially Expressed Genes (DEGs) to analyse the significant dysfunctions alone with the pathological stages. However, those studies often ignore the fact that DEGs detected from cancer patients tend to be highly heterogeneous with each other, which could easily mislead the enriched dysfunctions toward to those less relevant functions. Hence, in this study, we propose a novel method to generate the functional evolution network to describe the transferring processes of dysfunctions of the cancer. Results interpret that the proposed network construction method has a powerful capacity in detecting the most relevant cellular functions compared with existing methods, which could be employed to explore the evolution processes of cancers and may provide a new method for therapeutic intervention. },
    source = {https://www.inderscienceonline.com/doi/abs/10.1504/IJDMB.2020.107879 },
    }

  • Chaima Aouiche, Bolin Chen, Xuequn Shang . "Predicting Stage-Specific Recurrent Aberrations From Somatic Copy Number Dataset ", Frontiers in Genetics , 2020, 11: 160.(Article)
  • [Bibtex]

    @article{nwpu_index117,
    year = {2020},
    author = {Chaima Aouiche, Bolin Chen, Xuequn Shang },
    journal = {Frontiers in Genetics },
    title = {Predicting Stage-Specific Recurrent Aberrations From Somatic Copy Number Dataset },
    volume = {11},
    article number = {160},
    type = {Article},
    abstract = {Exploring the evolution process of cancers and its related complex molecular mechanisms at the genomic level through pathological staging angle is particularly important for providing novel therapeutic strategies most relevant to every cancer patient diagnosed at each stage. This is because the genomic level involving copy number variation (CNV) has been recognized as a critical genetic variation, which has a large influence on the progression of a variety of complex diseases. Great efforts have been devoted to the identification of recurrent aberrations, single genes and individual static pathways related to cancer progression. However, we still have little knowledge about the most important aberrant genes related to the pathology stages and their interconnected pathways from genomic profiles. In this study, we propose an identification framework that allows determining cancer-stages specific patterns dynamically. Firstly, a two-stage GAIA method is employed to identify stage-specific aberrant copy number variants segments. Secondly, stage-specific cancer genes fully located within the aberrant segments are then identified according to the reference annotation dataset. Thirdly, a pathway evolution network is constructed based on the impacted pathways functions and their overlapped genes. The involved significant functions and evolution paths uncovered by this network enabled investigation of the real progression of cancers, and thus facilitated the determination of appropriate clinical settings that will help to assess risk in cancer patients. Those findings at individual levels can be integrated to identify robust biomarkers in cancer progressions. },
    source = {https://frontiersin.yncjkj.com/articles/10.3389/fgene.2020.00160/full },
    }

2019

  • Jiajie Peng, Weiwei Hui, Qianqian Li, Bolin Chen, Jianye Hao, Qinghua Jiang, Xuequn Shang, Zhongyu Wei . "A learning-based framework for miRNA-disease association identification using neural networks ", Bioinformatics , 2019, 35: 4364-4371.(Article)
  • [Bibtex]

    @article{nwpu_index100,
    year = {2019},
    author = {Jiajie Peng, Weiwei Hui, Qianqian Li, Bolin Chen, Jianye Hao, Qinghua Jiang, Xuequn Shang, Zhongyu Wei },
    journal = {Bioinformatics },
    title = {A learning-based framework for miRNA-disease association identification using neural networks },
    volume = {35},
    pages = {4364-4371},
    type = {Article},
    abstract = {"Motivation A microRNA (miRNA) is a type of non-coding RNA, which plays important roles in many biological processes. Lots of studies have shown that miRNAs are implicated in human diseases, indicating that miRNAs might be potential biomarkers for various types of diseases. Therefore, it is important to reveal the relationships between miRNAs and diseases/phenotypes. Results We propose a novel learning-based framework, MDA-CNN, for miRNA-disease association identification. The model first captures interaction features between diseases and miRNAs based on a three-layer network including disease similarity network, miRNA similarity network and protein-protein interaction network. Then, it employs an auto-encoder to identify the essential feature combination for each pair of miRNA and disease automatically. Finally, taking the reduced feature representation as input, it uses a convolutional neural network to predict the final label. The evaluation results show that the proposed framework outperforms some state-of-the-art approaches in a large margin on both tasks of miRNA-disease association prediction and miRNA-phenotype association prediction." },
    source = {https://academic.oup.com/bioinformatics/article/35/21/4364/5448859?login=false },
    }

  • Jiajie Peng, Xiaoyu Wang, Xuequn Shang . "Combining gene ontology with deep neural networks to enhance the clustering of single cell RNA-Seq data ", BMC Bioinformatics , 2019, 20: 284.(Article)
  • [Bibtex]

    @article{nwpu_index101,
    year = {2019},
    author = {Jiajie Peng, Xiaoyu Wang, Xuequn Shang },
    journal = {BMC Bioinformatics },
    title = {Combining gene ontology with deep neural networks to enhance the clustering of single cell RNA-Seq data },
    volume = {20},
    article number = {284},
    type = {Article},
    abstract = {"Background Single cell RNA sequencing (scRNA-seq) is applied to assay the individual transcriptomes of large numbers of cells. The gene expression at single-cell level provides an opportunity for better understanding of cell function and new discoveries in biomedical areas. To ensure that the single-cell based gene expression data are interpreted appropriately, it is crucial to develop new computational methods. Results In this article, we try to re-construct a neural network based on Gene Ontology (GO) for dimension reduction of scRNA-seq data. By integrating GO with both unsupervised and supervised models, two novel methods are proposed, named GOAE (Gene Ontology AutoEncoder) and GONN (Gene Ontology Neural Network) respectively. Conclusions The evaluation results show that the proposed models outperform some state-of-the-art dimensionality reduction approaches. Furthermore, incorporating with GO, we provide an opportunity to interpret the underlying biological mechanism behind the neural network-based model." },
    source = {https://linkspringer.53yu.com/article/10.1186/s12859-019-2769-6 },
    }

  • Hansheng Xue, Jiajie Peng, Xuequn Shang . "Towards Gene Function Prediction via Multi-Networks Representation Learning ", In Proceedings of the AAAI Conference on Artificial Intelligence, vol.33,2019.(Student Abstract Track)
  • [Bibtex]

    @conference{nwpu_index103,
    year = {2019},
    author = {Hansheng Xue, Jiajie Peng, Xuequn Shang },
    booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
    title = {Towards Gene Function Prediction via Multi-Networks Representation Learning },
    volume = {33},
    type = {Student Abstract Track},
    abstract = {Multi-networks integration methods have achieved prominent performance on many network-based tasks, but these approaches often incur information loss problem. In this paper, we propose a novel multi-networks representation learning method based on semi-supervised autoencoder, termed as DeepMNE, which captures complex topological structures of each network and takes the correlation among multinetworks into account. The experimental results on two realworld datasets indicate that DeepMNE outperforms the existing state-of-the-art algorithms. },
    source = {https://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-019-0697-8 },
    }

  • Hansheng Xue, Jiajie Peng, Xuequn Shang . "Predicting disease-related phenotypes using an integrated phenotype similarity measurement based on HPO ", BMC Systems Biology , 2019, 13: 34.(Article)
  • [Bibtex]

    @article{nwpu_index104,
    year = {2019},
    author = {Hansheng Xue, Jiajie Peng, Xuequn Shang },
    journal = {BMC Systems Biology },
    title = {Predicting disease-related phenotypes using an integrated phenotype similarity measurement based on HPO },
    volume = {13},
    article number = {34},
    type = {Article},
    abstract = {"Background Improving efficiency of disease diagnosis based on phenotype ontology is a critical yet challenging research area. Recently, Human Phenotype Ontology (HPO)-based semantic similarity has been affectively and widely used to identify causative genes and diseases. However, current phenotype similarity measurements just consider the annotations and hierarchy structure of HPO, neglecting the definition description of phenotype terms. Results In this paper, we propose a novel phenotype similarity measurement, termed as DisPheno, which adequately incorporates the definition of phenotype terms in addition to HPO structure and annotations to measure the similarity between phenotype terms. DisPheno also integrates phenotype term associations into phenotype-set similarity measurement using gene and disease annotations of phenotype terms. Conclusions Compared with five existing state-of-the-art methods, DisPheno shows great performance in HPO-based phenotype semantic similarity measurement and improves the efficiency of disease identification, especially on noisy patients dataset." },
    source = {https://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-019-0697-8 },
    }

  • Jiajie Peng, Jiaojiao Guan, Xuequn Shang . "Predicting Parkinson's Disease Genes Based on Node2vec and Autoencoder ", Frontiers in Genetics , 2019, 10: 226.(Article)
  • [Bibtex]

    @article{nwpu_index105,
    year = {2019},
    author = {Jiajie Peng, Jiaojiao Guan, Xuequn Shang },
    journal = {Frontiers in Genetics },
    title = {Predicting Parkinson's Disease Genes Based on Node2vec and Autoencoder },
    volume = {10},
    article number = {226},
    type = {Article},
    abstract = {Identifying genes associated with Parkinson's disease plays an extremely important role in the diagnosis and treatment of Parkinson's disease. In recent years, based on the guilt-by-association hypothesis, many methods have been proposed to predict disease-related genes, but few of these methods are designed or used for Parkinson's disease gene prediction. In this paper, we propose a novel prediction method for Parkinson's disease gene prediction, named N2A-SVM. N2A-SVM includes three parts: extracting features of genes based on network, reducing the dimension using deep neural network, and predicting Parkinson's disease genes using a machine learning method. The evaluation test shows that N2A-SVM performs better than existing methods. Furthermore, we evaluate the significance of each step in the N2A-SVM algorithm and the influence of the hyper-parameters on the result. In addition, we train N2A-SVM on the recent dataset and used it to predict Parkinson's disease genes. The predicted top-rank genes can be verified based on literature study. },
    source = {https://frontiersin.yncjkj.com/articles/10.3389/fgene.2019.00226/full },
    }

  • Jialu Hu, Jingru Wang, Jianan Lin, Tianwei Liu, Yuanke Zhong, Jie Liu, Yan Zheng, Yiqun Gao, Junhao He, Xuequn Shang . "MD-SVM: a novel SVM-based algorithm for the motif discovery of transcription factor binding sites ", BMC Bioinformatics , 2019, 20: 41-48.(Article)
  • [Bibtex]

    @article{nwpu_index107,
    year = {2019},
    author = {Jialu Hu, Jingru Wang, Jianan Lin, Tianwei Liu, Yuanke Zhong, Jie Liu, Yan Zheng, Yiqun Gao, Junhao He, Xuequn Shang },
    journal = {BMC Bioinformatics },
    title = {MD-SVM: a novel SVM-based algorithm for the motif discovery of transcription factor binding sites },
    volume = {20},
    pages = {41-48},
    type = {Article},
    abstract = {"Background Transcription factors (TFs) play important roles in the regulation of gene expression. They can activate or block transcription of downstream genes in a manner of binding to specific genomic sequences. Therefore, motif discovery of these binding preference patterns is of central significance in the understanding of molecular regulation mechanism. Many algorithms have been proposed for the identification of transcription factor binding sites. However, it remains a challengeable problem. Results Here, we proposed a novel motif discovery algorithm based on support vector machine (MD-SVM) to learn a discriminative model for TF binding sites. MD-SVM firstly obtains position weight matrix (PWM) from a set of training datasets. Then it translates the MD problem into a computational framework of multiple instance learning (MIL). It was applied to several real biological datasets. Results show that our algorithm outperforms MI-SVM in terms of both accuracy and specificity. Conclusions In this paper, we modeled the TF motif discovery problem as a MIL optimization problem. The SVM algorithm was adapted to discriminate positive and negative bags of instances. Compared to other svm-based algorithms, MD-SVM show its superiority over its competitors in term of ROC AUC. Hopefully, it could be of benefit to the research community in the understanding of molecular functions of DNA functional elements and transcription factors." },
    source = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-2735-3},
    }

  • Jialu Hu, Yiqun Gao, Jing Li, Xuequn Shang . "Deep Learning Enables Accurate Prediction of Interplay Between lncRNA and Disease ", Frontiers in Genetics , 2019, 10: 937.(Article)
  • [Bibtex]

    @article{nwpu_index108,
    year = {2019},
    author = {Jialu Hu, Yiqun Gao, Jing Li, Xuequn Shang },
    journal = {Frontiers in Genetics },
    title = {Deep Learning Enables Accurate Prediction of Interplay Between lncRNA and Disease },
    volume = {10},
    article number = {937},
    type = {Article},
    abstract = { Many studies have suggested that lncRNAs are involved in distinct and diverse biological processes. The mutation of lncRNAs plays a major role in a wide range of diseases. A comprehensive information of lncRNA-disease associations would improve our understanding of the underlying molecular mechanism that can explain the development of disease. However, the discovery of the relationship between lncRNA and disease in biological experiment is costly and time-consuming. Although many computational algorithms have been proposed in the last decade, there still exists much room to improve because of diverse computational limitations. In this paper, we proposed a deep-learning framework, NNLDA, to predict potential lncRNA-disease associations. We compared it with other two widely-used algorithms on a network with 205,959 interactions between 19,166 lncRNAs and 529 diseases. Results show that NNLDA outperforms other existing algorithm in the prediction of lncRNA-disease association. Additionally, NNLDA can be easily applied to large-scale datasets using the technique of mini-batch stochastic gradient descent. To our best knowledge, NNLDA is the first algorithm that uses deep neural networks to predict lncRNA-disease association. The source code of NNLDA can be freely accessed at https://github.com/gao793583308/NNLDA. },
    source = {https://frontiersin.yncjkj.com/articles/10.3389/fgene.2019.00937/full },
    }

  • Jialu Hu, Junhao He, Jing Li, Yiqun Gao, Yan Zheng, Xuequn Shang . "A novel algorithm for alignment of multiple PPI networks based on simulated annealing ", BMC Genomics , 2019, 20: 932.(Article)
  • [Bibtex]

    @article{nwpu_index109,
    year = {2019},
    author = {Jialu Hu, Junhao He, Jing Li, Yiqun Gao, Yan Zheng, Xuequn Shang },
    journal = {BMC Genomics },
    title = {A novel algorithm for alignment of multiple PPI networks based on simulated annealing },
    volume = {20},
    article number = {932},
    type = {Article},
    abstract = {Proteins play essential roles in almost all life processes. The prediction of protein function is of significance for the understanding of molecular function and evolution. Network alignment provides a fast and effective framework to automatically identify functionally conserved proteins in a systematic way. However, due to the fast growing genomic data, interactions and annotation data, there is an increasing demand for more accurate and efficient tools to deal with multiple PPI networks. Here, we present a novel global alignment algorithm NetCoffee2 based on graph feature vectors to discover functionally conserved proteins and predict function for unknown proteins. To test the algorithm performance, NetCoffee2 and three other notable algorithms were applied on eight real biological datasets. Functional analyses were performed to evaluate the biological quality of these alignments. Results show that NetCoffee2 is superior to existing algorithms IsoRankN, NetCoffee and multiMAGNA++ in terms of both coverage and consistency. The binary and source code are freely available under the GNU GPL v3 license at https://github.com/screamer/NetCoffee2. },
    source = {https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-019-6302-0 },
    }

  • Jialu Hu, Yiqun Gao, Jing Li, Yan Zheng, Jingru Wang, Xuequn Shang . "A novel algorithm based on bi-random walks to identify disease-related lncRNAs ", BMC Bioinformatics , 2019, 20: 569.(Article)
  • [Bibtex]

    @article{nwpu_index110,
    year = {2019},
    author = {Jialu Hu, Yiqun Gao, Jing Li, Yan Zheng, Jingru Wang, Xuequn Shang },
    journal = {BMC Bioinformatics },
    title = {A novel algorithm based on bi-random walks to identify disease-related lncRNAs },
    volume = {20},
    article number = {569},
    type = {Article},
    abstract = {"Backgrounds There is evidence to suggest that lncRNAs are associated with distinct and diverse biological processes. The dysfunction or mutation of lncRNAs are implicated in a wide range of diseases. An accurate computational model can benefit the diagnosis of diseases and help us to gain a better understanding of the molecular mechanism. Although many related algorithms have been proposed, there is still much room to improve the accuracy of the algorithm. Results We developed a novel algorithm, BiWalkLDA, to predict disease-related lncRNAs in three real datasets, which have 528 lncRNAs, 545 diseases and 1216 interactions in total. To compare performance with other algorithms, the leave-one-out validation test was performed for BiWalkLDA and three other existing algorithms, SIMCLDA, LDAP and LRLSLDA. Additional tests were carefully designed to analyze the parameter effects such as ?, ?, l and r, which could help user to select the best choice of these parameters in their own application. In a case study of prostate cancer, eight out of the top-ten disease-related lncRNAs reported by BiWalkLDA were previously confirmed in literatures. Conclusions In this paper, we develop an algorithm, BiWalkLDA, to predict lncRNA-disease association by using bi-random walks. It constructs a lncRNA-disease network by integrating interaction profile and gene ontology information. Solving cold-start problem by using neighbors’ interaction profile information. Then, bi-random walks was applied to three real biological datasets. Results show that our method outperforms other algorithms in predicting lncRNA-disease association in terms of both accuracy and specificity." },
    source = {https://linkspringer.53yu.com/article/10.1186/s12859-019-3128-3 },
    }

  • Chaima Aouiche, Bolin Chen, Xuequn Shang . "Predicting stage-specific cancer related genes and their dynamic modules by integrating multiple datasets ", BMC Bioinformatics , 2019, 20: 194.(Article)
  • [Bibtex]

    @article{nwpu_index118,
    year = {2019},
    author = {Chaima Aouiche, Bolin Chen, Xuequn Shang },
    journal = {BMC Bioinformatics },
    title = {Predicting stage-specific cancer related genes and their dynamic modules by integrating multiple datasets },
    volume = {20},
    article number = {194},
    type = {Article},
    abstract = {"Background The mechanism of many complex diseases has not been detected accurately in terms of their stage evolution. Previous studies mainly focus on the identification of associations between genes and individual diseases, but less is known about their associations with specific disease stages. Exploring biological modules through different disease stages could provide valuable knowledge to genomic and clinical research. In this study, we proposed a powerful and versatile framework to identify stage-specific cancer related genes and their dynamic modules by integrating multiple datasets. The discovered modules and their specific-signature genes were significantly enriched in many relevant known pathways. To further illustrate the dynamic evolution of these clinical-stages, a pathway network was built by taking individual pathways as vertices and the overlapping relationship between their annotated genes as edges. Conclusions The identified pathway network not only help us to understand the functional evolution of complex diseases, but also useful for clinical management to select the optimum treatment regimens and the appropriate drugs for patients." },
    source = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-2740-6 },
    }

  • Yupei Zhang, Huan Dai, Yue Yun, Xuequn Shang. "Student Knowledge Diagnosis on Response Data via the Model of Sparse Factor Learning", In Proceedings of The 12th International Conference on Educational Data Mining (EDM 2019),2019: 691-694.(Poster Paper)
  • [Bibtex]

    @conference{nwpu_index122,
    year = {2019},
    author = {Yupei Zhang, Huan Dai, Yue Yun, Xuequn Shang},
    booktitle = {Proceedings of The 12th International Conference on Educational Data Mining (EDM 2019)},
    title = {Student Knowledge Diagnosis on Response Data via the Model of Sparse Factor Learning},
    pages = {691-694},
    type = {Poster Paper},
    abstract = {Cognitive diagnosis aims to analyze the status of knowledgemastery of student and is thus very important for person-alized education. The existing methods mostly depend onthe empirical Q-matrix from domain experts. However, theknowledge points in Q-matrix are unavoidably overlapping,leading to the weak performance on the practical applica-tions. In this paper, we propose a novel model for studentknowledge diagnosis, called Sparse Factor Learning (SFL).SFL learns a meta-knowledge dictionary from student re-sponse data of test questions, where the knowledge struc-ture of any entity (e.g., student, question or others) is asparse linear combination of dictionary atoms. Our methodhas three innovations for cognitive diagnosis: learning laten-t nonoverlapping meta-knowledge, sparely representing theentities, and removing the bias noise for guessing and slip-ping. To verify our method, we collected the response datafrom the final exam of C language program of internation-al class and then conducted the experiments for knowledgediagnosis, student grouping and response prediction.The ex-periment results show that SFL works effectively and resultsin decent performance. Besides,it delivers that student whofavors mathematics and physics can achieve higher score.Allcodes and data set can be available on our website. },
    }

2018

  • Guo, Yang, Liu, Shuhui, Li, Zhanhuai and Shang, Xuequn. "BCDForest: a boosting cascade deep forest model towards the classification of cancer subtypes based on gene expression data", BMC bioinformatics, 2018, 19(5): 118.(Article)
  • [Bibtex]

    @article{nwpu_index61,
    year = {2018},
    author = {Guo, Yang, Liu, Shuhui, Li, Zhanhuai and Shang, Xuequn},
    journal = {BMC bioinformatics},
    title = {BCDForest: a boosting cascade deep forest model towards the classification of cancer subtypes based on gene expression data},
    number = {5},
    volume = {19},
    article number = {118},
    type = {Article},
    }

  • Jialu Hu, Yiqun Gao, Yan Zheng, Xuequn Shang. "KF-finder: Identification of key factors from host-microbial networks in cervical cancer", BMC Systems Biology, 2018, 12(S4): 54.(Article)
  • [Bibtex]

    @article{nwpu_index62,
    year = {2018},
    author = {Jialu Hu, Yiqun Gao, Yan Zheng, Xuequn Shang},
    journal = {BMC Systems Biology},
    title = {KF-finder: Identification of key factors from host-microbial networks in cervical cancer},
    number = {S4},
    volume = {12},
    article number = {54},
    type = {Article},
    }

  • Y Zhang, M Xiang, B Yang. "Hierarchical sparse coding from a Bayesian perspective", Neurocomputing, 2018, 272: 279-293.(Article)
  • [Bibtex]

    @article{nwpu_index68,
    year = {2018},
    author = {Y Zhang, M Xiang, B Yang},
    journal = {Neurocomputing},
    title = {Hierarchical sparse coding from a Bayesian perspective},
    volume = {272},
    pages = {279-293},
    type = {Article},
    }

  • Y Zhang, S Liu, X Shang, M Xiang. "Low-Rank Graph Regularized Sparse Coding", In Pacific Rim International Conference on Artificial Intelligence,2018: 177-190.(Conference)
  • [Bibtex]

    @conference{nwpu_index70,
    year = {2018},
    author = {Y Zhang, S Liu, X Shang, M Xiang},
    booktitle = {Pacific Rim International Conference on Artificial Intelligence},
    title = {Low-Rank Graph Regularized Sparse Coding},
    pages = {177-190},
    type = {Conference},
    }

  • Jialu Hu, Yiqun Gao, Junhao He, Yan Zheng and Xuequn Shang. "WebNetCoffee: a web-based application to identify functionally conserved proteins from Multiple PPI networks", BMC Bioinformatics, 2018, 19(1): 422.(Article)
  • [Bibtex]

    @article{nwpu_index71,
    year = {2018},
    author = {Jialu Hu, Yiqun Gao, Junhao He, Yan Zheng and Xuequn Shang},
    journal = {BMC Bioinformatics},
    title = {WebNetCoffee: a web-based application to identify functionally conserved proteins from Multiple PPI networks},
    number = {1},
    volume = {19},
    article number = {422},
    type = {Article},
    abstract = {The discovery of functionally conserved proteins is a tough and important task in system biology. Global network alignment provides a systematic framework to search for these proteins from multiple protein-protein interaction (PPI) networks. Although there exist many web servers for network alignment, no one allows to perform global multiple network alignment tasks on users' test datasets},
    source = {https://doi.org/10.1186/s12859-018-2443-4},
    }

  • Hu, Jialu and Zheng, Yan and Shang, Xuequn. "MiteFinderII: a novel tool to identify miniature inverted-repeat transposable elements hidden in eukaryotic genomes", BMC Medical Genomics, 2018, 11(5): 101.(Article)
  • [Bibtex]

    @article{nwpu_index72,
    year = {2018},
    author = {Hu, Jialu and Zheng, Yan and Shang, Xuequn},
    journal = {BMC Medical Genomics},
    title = {MiteFinderII: a novel tool to identify miniature inverted-repeat transposable elements hidden in eukaryotic genomes},
    number = {5},
    volume = {11},
    article number = {101},
    type = {Article},
    abstract = {Miniature inverted-repeat transposable element (MITE) is a type of class II non-autonomous transposable element playing a crucial role in the process of evolution in biology. There is an urgent need to develop bioinformatics tools to effectively identify MITEs on a whole genome-wide scale. However, most of currently existing tools suffer from low ability to deal with large eukaryotic genomes.},
    source = {https://doi.org/10.1186/s12920-018-0418-y},
    }

  • Hu, Jialu and He, Junhao and Gao, Yiqun and Zheng, Yan and Shang, Xuequn. "NetCoffee2: A Novel Global Alignment Algorithm for Multiple PPI Networks Based on Graph Feature Vectors", In Intelligent Computing Theories and Application,2018: 241-246, Cham.(Conference), Springer International Publishing
  • [Bibtex]

    @conference{nwpu_index73,
    year = {2018},
    author = {Hu, Jialu and He, Junhao and Gao, Yiqun and Zheng, Yan and Shang, Xuequn},
    booktitle = {Intelligent Computing Theories and Application},
    title = {NetCoffee2: A Novel Global Alignment Algorithm for Multiple PPI Networks Based on Graph Feature Vectors},
    pages = {241-246},
    address = {Cham},
    type = {Conference},
    publisher = {Springer International Publishing},
    }

2017

  • Jiajie Peng, Kun Bai, Xuequn Shang, Guohua Wang, Hansheng Xue, Shuilin Jin, Liang Cheng, Yadong Wang, Jin Chen. "Predicting disease-related genes using integrated biomedical networks ", BMC Genomics, 2017, 18(1): 1043.(Article)
  • [Bibtex]

    @article{nwpu_index27,
    year = {2017},
    author = {Jiajie Peng, Kun Bai, Xuequn Shang, Guohua Wang, Hansheng Xue, Shuilin Jin, Liang Cheng, Yadong Wang, Jin Chen},
    journal = {BMC Genomics},
    title = {Predicting disease-related genes using integrated biomedical networks },
    number = {1},
    volume = {18},
    article number = {1043},
    type = {Article},
    }

  • Danelishvili, L and Shulzhenko, N and Jjj, Chinison and Babrak, L and Hu, J. and Morgun, A and Burrows, G and Bermudez, L. E.. "Mycobacterium tuberculosis proteome response to anti-tuberculosis compounds reveals metabolic "escape" pathways that prolong bacterial survival", Antimicrobial Agents & Chemotherapy, 2017.(Article)
  • [Bibtex]

    @article{nwpu_index55,
    year = {2017},
    author = {Danelishvili, L and Shulzhenko, N and Jjj, Chinison and Babrak, L and Hu, J. and Morgun, A and Burrows, G and Bermudez, L. E.},
    journal = {Antimicrobial Agents & Chemotherapy},
    title = {Mycobacterium tuberculosis proteome response to anti-tuberculosis compounds reveals metabolic "escape" pathways that prolong bacterial survival},
    type = {Article},
    }

  • Hu, Jialu and Shang, Xuequn. "Detection of Network Motif Based on a Novel Graph Canonization Algorithm from Transcriptional Regulation Networks", Molecules, 2017, 22(12): 2194.(Article)
  • [Bibtex]

    @article{nwpu_index56,
    year = {2017},
    author = {Hu, Jialu and Shang, Xuequn},
    journal = {Molecules},
    title = {Detection of Network Motif Based on a Novel Graph Canonization Algorithm from Transcriptional Regulation Networks},
    number = {12},
    volume = {22},
    article number = {2194},
    type = {Article},
    }

  • Peng, Jiajie and Wang, Honggang and Lu, Junya and Hui, Weiwei and Wang, Yadong and Shang, Xuequn. "Identifying term relations cross different gene ontology categories", BMC Bioinformatics, 2017, 18(16): 573.(Article)
  • [Bibtex]

    @article{nwpu_index57,
    year = {2017},
    author = {Peng, Jiajie and Wang, Honggang and Lu, Junya and Hui, Weiwei and Wang, Yadong and Shang, Xuequn},
    journal = {BMC Bioinformatics},
    title = {Identifying term relations cross different gene ontology categories},
    number = {16},
    volume = {18},
    article number = {573},
    type = {Article},
    abstract = {The Gene Ontology (GO) is a community-based bioinformatics resource that employs ontologies to represent biological knowledge and describes information about gene and gene product function. GO includes three independent categories: molecular function, biological process and cellular component. For better biological reasoning, identifying the biological relationships between terms in different categories are important.},
    source = {https://doi.org/10.1186/s12859-017-1959-3},
    }

  • Jiajie Peng Weiwei Hui and Xuequn Shang. "Measuring phenotype-phenotype similarity through the interactome", In BIBM Workshop on Biological Ontologies and Knowledge Bases,2017.(Conference)
  • [Bibtex]

    @conference{nwpu_index58,
    year = {2017},
    author = {Jiajie Peng Weiwei Hui and Xuequn Shang},
    booktitle = {BIBM Workshop on Biological Ontologies and Knowledge Bases},
    title = {Measuring phenotype-phenotype similarity through the interactome},
    type = {Conference},
    }

  • Jiajie Peng, Xuanshuo Zhang, Weiwei Hui, Junya Lu, Qianqian Li and Xuequn Shang. "Improving the measurement of semantic similarity by combining gene ontology and co-functional network: a random walk based approach", In GIW / BIOINFO 2017,2017.(Conference)
  • [Bibtex]

    @conference{nwpu_index59,
    year = {2017},
    author = { Jiajie Peng, Xuanshuo Zhang, Weiwei Hui, Junya Lu, Qianqian Li and Xuequn Shang},
    booktitle = {GIW / BIOINFO 2017},
    title = {Improving the measurement of semantic similarity by combining gene ontology and co-functional network: a random walk based approach},
    type = {Conference},
    }

  • Jiajie Peng and Junya Lu and Xuequn Shang and Jin Chen. "Identifying consistent disease subnetworks using DNet", Methods, 2017, 131: 104-110.(Article)
  • [Bibtex]

    @article{nwpu_index60,
    year = {2017},
    author = {Jiajie Peng and Junya Lu and Xuequn Shang and Jin Chen},
    journal = {Methods},
    title = {Identifying consistent disease subnetworks using DNet},
    volume = {131},
    pages = {104-110},
    type = {Article},
    source = {http://www.sciencedirect.com/science/article/pii/S1046202317300610},
    }

  • Jialu Hu, Yan Zheng, and Xuequn Shang. "MiteFinder: A fast approach to identify miniature inverted-repeat transposable elements on a genome-wide scale", In IEEE International Conference on Bioinformatics and Biomedicine (BIBM),2017: 164-168, Kansas City, NOV 13-16 .(Conference)
  • [Bibtex]

    @conference{nwpu_index63,
    year = {2017},
    author = {Jialu Hu, Yan Zheng, and Xuequn Shang},
    booktitle = {IEEE International Conference on Bioinformatics and Biomedicine (BIBM)},
    title = {MiteFinder: A fast approach to identify miniature inverted-repeat transposable elements on a genome-wide scale},
    month = {NOV 13-16},
    pages = {164-168},
    address = {Kansas City},
    type = {Conference},
    }

  • Y Zhang, M Xiang, B Yang. "Low-rank preserving embedding", Pattern Recognition, 2017, 70: 112-125.(Article)
  • [Bibtex]

    @article{nwpu_index66,
    year = {2017},
    author = {Y Zhang, M Xiang, B Yang},
    journal = {Pattern Recognition},
    title = {Low-rank preserving embedding},
    volume = {70},
    pages = {112-125},
    type = {Article},
    }

  • Y Zhang, M Xiang, B Yang. "Graph regularized nonnegative sparse coding using incoherent dictionary for approximate nearest neighbor search", Pattern Recognition, 2017, 70: 75-88.(Article)
  • [Bibtex]

    @article{nwpu_index67,
    year = {2017},
    author = {Y Zhang, M Xiang, B Yang},
    journal = {Pattern Recognition},
    title = {Graph regularized nonnegative sparse coding using incoherent dictionary for approximate nearest neighbor search},
    volume = {70},
    pages = {75-88},
    type = {Article},
    }

2016

  • T. Marschall and K. Reinert and (59 authors in total) others. "Computational pan-genomics: status, promises and challenges", Briefings in Bioinformatics, 2016, 10: 25-32.(Article)
  • [Bibtex]

    @article{nwpu_index2,
    year = {2016},
    author = {T. Marschall and K. Reinert and (59 authors in total) others},
    journal = {Briefings in Bioinformatics},
    title = {Computational pan-genomics: status, promises and challenges},
    volume = {10},
    pages = {25-32},
    type = {Article},
    abstract = {Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel, qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of future technologies and methodologies and review open challenges from the v},
    source = {http://publications.mi.fu-berlin.de/1463/},
    }

  • Jiajie Peng, Qianqian Li, Bolin Chen, Jialu Hu and Xuequn Shang. "Analyzing factors involved in the HPO-based semantic similarity calculation", In BIBM Workshop on Biological Ontologies and Knowledge Bases,2016, Shenzhen, 12 .(Conference), HIT
  • [Bibtex]

    @conference{nwpu_index7,
    year = {2016},
    author = {Jiajie Peng, Qianqian Li, Bolin Chen, Jialu Hu and Xuequn Shang},
    booktitle = {BIBM Workshop on Biological Ontologies and Knowledge Bases},
    title = {Analyzing factors involved in the HPO-based semantic similarity calculation},
    month = {12},
    address = {Shenzhen},
    type = {Conference},
    organization = {HIT},
    }

  • Bolin Chen, Xuequn Shang, Min Li, Jianxin Wang, Fang-Xiang Wu. "Identifying individual-cancer-related genes by rebalancing the training samples", IEEE Transactions on Nanobioscience, 2016, 15(4): 309-315.(Article)
  • [Bibtex]

    @article{nwpu_index16,
    year = {2016},
    author = {Bolin Chen, Xuequn Shang, Min Li, Jianxin Wang, Fang-Xiang Wu},
    journal = {IEEE Transactions on Nanobioscience},
    title = {Identifying individual-cancer-related genes by rebalancing the training samples},
    number = {4},
    volume = {15},
    pages = {309-315},
    type = {Article},
    abstract = {The identification of individual-cancer-related genes typically is an imbalanced classification issue. The number of known cancer-related genes is far less than the number of all unknown genes, which makes it very hard to detect novel predictions from such imbalanced training samples. A regular machine learning method can either only detect genes related to all cancers or add clinical knowledge to circumvent this issue. In this study, we introduce a training sample rebalancing strategy to overcome this issue by using a two-step logistic regression and a random resampling method. The two-step logistic regression is to select a set of genes that related to all cancers. While the random resampling method is performed to further classify those genes associated with individual cancers. The issue of imbalanced classification is circumvented by randomly adding positive instances related to other cancers at first, and then excluding those unrelated predictions according to the overall performance at the following step. Numerical experiments show that the proposed resampling method is able to identify cancer-related genes even when the number of known genes related to it is small. The final predictions for all individual cancers achieve AUC values around 0.93 by using the leave-one-out cross validation method, which is very promising, compared with existing methods.},
    source = {http://ieeexplore.ieee.org/document/7451278/authors?ctx=authors},
    }

  • Xuequn Shang, Yu Wang, Bolin Chen. "Identifying essential proteins based on dynamic PPI networks and RNA-Seq datasets", SCIENCE CHINA Information Science, 2016, 59: 070106.(Article)
  • [Bibtex]

    @article{nwpu_index17,
    year = {2016},
    author = {Xuequn Shang, Yu Wang, Bolin Chen},
    journal = {SCIENCE CHINA Information Science},
    title = {Identifying essential proteins based on dynamic PPI networks and RNA-Seq datasets},
    volume = {59},
    article number = {070106},
    type = {Article},
    abstract = {The identification of essential proteins is not only important for understanding organism structure on the molecular level, but also beneficial to drug-target detection and genetic disease prevention. Traditional methods often employ various centrality indices of static protein-protein interaction (PPI) networks and/or gene expression profiles to predict essential proteins. However, the prediction accuracy of most methods still has room to be further improved. In this study, we propose a strategy to increase the prediction accuracy of essential protein identification in three ways. Firstly, RNA-Seq datasets are employed to construct integrated dynamic PPI networks. Using a RNA-Seq dataset is expected to give more accurate predictions than using microarray gene expression profiles. Secondly, a novel integrated dynamic PPI network is constructed by considering both the co-expression pattern and the co-expression level of the RNA-Seq data. Thirdly, a novel two-step strategy is proposed to identify essential proteins from two known centrality indices. Numerical experiments have shown that the proposed strategy can increase the prediction accuracy dramatically, which can be generalized to many existing methods and centrality indices.},
    source = {http://link.springer.com/article/10.1007/s11432-016-5583-z},
    }

  • Jiajie Peng, Hongxiang Li, Yongzhuang Liu, Liran Juan, Qinghua Jiang, Yadong Wang and Jin Chen. "InteGO2: a web tool for measuring and visualizing gene semantic similarities using Gene Ontology", BMC Genomics, 2016, 17(5): 530.(Article)
  • [Bibtex]

    @article{nwpu_index28,
    year = {2016},
    author = {Jiajie Peng, Hongxiang Li, Yongzhuang Liu, Liran Juan, Qinghua Jiang, Yadong Wang and Jin Chen},
    journal = {BMC Genomics},
    title = {InteGO2: a web tool for measuring and visualizing gene semantic similarities using Gene Ontology},
    number = {5},
    volume = {17},
    article number = {530},
    type = {Article},
    }

  • Jiajie Peng, Hansheng Xue, Yukai Shao, Xuequn Shang, Yadong Wang, and Jin Chen. "Measuring Phenotype Semantic Similarity using Human Phenotype Ontology", In Bioinformatics and Biomedicine (BIBM), 2016 IEEE International Conference on,2016.(Conference), IEEE
  • [Bibtex]

    @conference{nwpu_index29,
    year = {2016},
    author = {Jiajie Peng, Hansheng Xue, Yukai Shao, Xuequn Shang, Yadong Wang, and Jin Chen},
    booktitle = {Bioinformatics and Biomedicine (BIBM), 2016 IEEE International Conference on},
    title = {Measuring Phenotype Semantic Similarity using Human Phenotype Ontology},
    type = {Conference},
    organization = {IEEE},
    }

  • Jiajie Peng, Tao Wang, Jianping Hu, Yadong Wang, Jin Chen. "Constructing Networks of Organelle Functional Modules in Arabidopsis ", Current Genomics, 2016, 17(5): 427-438.(Article)
  • [Bibtex]

    @article{nwpu_index30,
    year = {2016},
    author = {Jiajie Peng, Tao Wang, Jianping Hu, Yadong Wang, Jin Chen},
    journal = {Current Genomics},
    title = {Constructing Networks of Organelle Functional Modules in Arabidopsis },
    number = {5},
    volume = {17},
    pages = {427-438},
    type = {Article},
    }

  • Jiajie Peng, Tao Wang, Jixuan Wang, Yadong Wang, Jin Chen. "Extending gene ontology with gene association networks", Bioinformatics, 2016, 32(8): 1185-1194.(Article)
  • [Bibtex]

    @article{nwpu_index33,
    year = {2016},
    author = {Jiajie Peng, Tao Wang, Jixuan Wang, Yadong Wang, Jin Chen},
    journal = {Bioinformatics},
    title = {Extending gene ontology with gene association networks},
    number = {8},
    volume = {32},
    pages = {1185-1194},
    type = {Article},
    }

  • Cieply B., Park JW., Nakauka-Ddamba A., Bebee TW., Guo Y., Shang X., Lengner CJ., Xing Y. "Carstens RP.+(2016) Multiphasic and dynamic changes in alternative splicing during induction of pluripotency are coordinated by numerous RNA binding proteins", Cell Reports, 2016, 15: 1-9.(Article)
  • [Bibtex]

    @article{nwpu_index48,
    year = {2016},
    author = {Cieply B., Park JW., Nakauka-Ddamba A., Bebee TW., Guo Y., Shang X., Lengner CJ., Xing Y},
    journal = {Cell Reports},
    title = {Carstens RP.+(2016) Multiphasic and dynamic changes in alternative splicing during induction of pluripotency are coordinated by numerous RNA binding proteins},
    volume = {15},
    pages = {1-9},
    type = {Article},
    }

  • Yang Y., Park JW., Bebee TW., Warzecha CC., Guo Y., Shang X., Xing Y., Carstens RP. "Determination of a comprehensive alternative splicing regulatory network and the combinatorial regulation by key factors during the epithelial to mesenchymal transition", Molecular and Cellular Biology, 2016, 36(11): 1704-1719.(Article)
  • [Bibtex]

    @article{nwpu_index49,
    year = {2016},
    author = {Yang Y., Park JW., Bebee TW., Warzecha CC., Guo Y., Shang X., Xing Y., Carstens RP},
    journal = {Molecular and Cellular Biology},
    title = {Determination of a comprehensive alternative splicing regulatory network and the combinatorial regulation by key factors during the epithelial to mesenchymal transition},
    number = {11},
    volume = {36},
    pages = {1704-1719},
    type = {Article},
    }

  • M Hu, L Shen, X Zan, X Shang, W Liu. "An efficient algorithm to identify the optimal one-bit perturbation based on the basin-of-state size of Boolean networks", Scientific Reports, 2016: PMC4872544.(Article)
  • [Bibtex]

    @article{nwpu_index50,
    year = {2016},
    author = {M Hu, L Shen, X Zan, X Shang, W Liu},
    journal = {Scientific Reports},
    title = {An efficient algorithm to identify the optimal one-bit perturbation based on the basin-of-state size of Boolean networks},
    article number = {PMC4872544},
    type = {Article},
    }

  • Jiang, Tao and Zhanhuai, L. I. and Shang, Xuequn and Chen, Bolin and Weibang, L. I. and Yin, Zhilei. "Constrained query of order-preserving submatrix in gene expression data", Frontiers of Computer Science, 2016, 10(6): 1-15.(Article)
  • [Bibtex]

    @article{nwpu_index51,
    year = {2016},
    author = {Jiang, Tao and Zhanhuai, L. I. and Shang, Xuequn and Chen, Bolin and Weibang, L. I. and Yin, Zhilei},
    journal = {Frontiers of Computer Science},
    title = {Constrained query of order-preserving submatrix in gene expression data},
    number = {6},
    volume = {10},
    pages = {1-15},
    type = {Article},
    }

  • B Yang, M Xiang, Y Zhang. "Multi-manifold discriminant Isomap for visualization and classification", Pattern Recognition, 2016, 55: 215-230.(Article)
  • [Bibtex]

    @article{nwpu_index64,
    year = {2016},
    author = {B Yang, M Xiang, Y Zhang},
    journal = {Pattern Recognition},
    title = {Multi-manifold discriminant Isomap for visualization and classification},
    volume = {55},
    pages = {215-230},
    type = {Article},
    }

  • Y Zhang, M Xiang, B Yang. "Linear dimensionality reduction based on Hybrid structure preserving projections", Neurocomputing, 2016, 173: 518-529.(Article)
  • [Bibtex]

    @article{nwpu_index65,
    year = {2016},
    author = {Y Zhang, M Xiang, B Yang},
    journal = {Neurocomputing},
    title = {Linear dimensionality reduction based on Hybrid structure preserving projections},
    volume = {173},
    pages = {518-529},
    type = {Article},
    }

2015

  • Jialu Hu and Knut Reinert. "LocalAli: an evolutionary-based local alignment approach to identify functionally conserved modules in multiple networks", Bioinformatics, 2015, 31(3): 363-372.(Article)
  • [Bibtex]

    @article{nwpu_index14,
    year = {2015},
    author = {Jialu Hu and Knut Reinert},
    journal = {Bioinformatics},
    title = {LocalAli: an evolutionary-based local alignment approach to identify functionally conserved modules in multiple networks},
    number = {3},
    volume = {31},
    pages = {363-372},
    type = {Article},
    }

  • Bolin Chen, Min Li, Jianxin Wang, Xuequn Shang, Fang-Xiang Wu. "A fast and high performance multiple data integration algorithm for identifying human disease genes", BMC Medical Genomics, 2015, 8(Suppl 3): S2.(Article)
  • [Bibtex]

    @article{nwpu_index18,
    year = {2015},
    author = {Bolin Chen, Min Li, Jianxin Wang, Xuequn Shang, Fang-Xiang Wu},
    journal = {BMC Medical Genomics},
    title = {A fast and high performance multiple data integration algorithm for identifying human disease genes},
    number = {Suppl 3},
    volume = {8},
    article number = {S2},
    type = {Article},
    abstract = { Background Integrating multiple data sources is indispensable in improving disease gene identification. It is not only due to the fact that disease genes associated with similar genetic diseases tend to lie close with each other in various biological networks, but also due to the fact that gene-disease associations are complex. Although various algorithms have been proposed to identify disease genes, their prediction performances and the computational time still should be further improved. Results In this study, we propose a fast and high performance multiple data integration algorithm for identifying human disease genes. A posterior probability of each candidate gene associated with individual diseases is calculated by using a Bayesian analysis method and a binary logistic regression model. Two prior probability estimation strategies and two feature vector construction methods are developed to test the performance of the proposed algorithm. Conclusions The proposed algorithm is not only generated predictions with high AUC scores, but also runs very fast. When only a single PPI network is employed, the AUC score is 0.769 by using F2 as feature vectors. The average running time for each leave-one-out experiment is only around 1.5 seconds. When three biological networks are integrated, the AUC score using F3 as feature vectors increases to 0.830, and the average running time for each leave-one-out experiment takes only about 12.54 seconds. It is better than many existing algorithms.},
    source = {www.biomedcentral.com/1755-8794/8/S3/S2},
    }

  • Bolin Chen, Xuequn Shang, Min Li, Jianxin Wang, Fang-Xiang Wu. "A two-step logistic regression based algorithm for identifying individual-cancer-related genes", In Bioinformatics and Biomedicine (BIBM), 2015 IEEE International Conference on,2015.(Conference)
  • [Bibtex]

    @conference{nwpu_index23,
    year = {2015},
    author = {Bolin Chen, Xuequn Shang, Min Li, Jianxin Wang, Fang-Xiang Wu},
    booktitle = {Bioinformatics and Biomedicine (BIBM), 2015 IEEE International Conference on},
    title = {A two-step logistic regression based algorithm for identifying individual-cancer-related genes},
    type = {Conference},
    }

  • Jiajie Peng, Sahra Uygun, Taehyong Kim,Yadong Wang, Seung Y. Rhee, Jin Chen. "Measuring semantic similarities by combining gene ontology annotations and gene co-function networks", BMC bioinformatics, 2015, 16(1): 44.(Article)
  • [Bibtex]

    @article{nwpu_index34,
    year = {2015},
    author = {Jiajie Peng, Sahra Uygun, Taehyong Kim,Yadong Wang, Seung Y. Rhee, Jin Chen},
    journal = {BMC bioinformatics},
    title = {Measuring semantic similarities by combining gene ontology annotations and gene co-function networks},
    number = {1},
    volume = {16},
    article number = {44},
    type = {Article},
    }

  • Jialu Hu, Khiem Lam, Xiaoxi Dong, Heidi Lyng, Natalia Shulzhenko, and Andrey Morgun. "Identification of bacterial pathogens from host-microbe interaction networks", In CGRB fall conference,2015, Corvallis, Sep 18 .(Conference)
  • [Bibtex]

    @conference{nwpu_index40,
    year = {2015},
    author = {Jialu Hu, Khiem Lam, Xiaoxi Dong, Heidi Lyng, Natalia Shulzhenko, and Andrey Morgun},
    booktitle = {CGRB fall conference},
    title = {Identification of bacterial pathogens from host-microbe interaction networks},
    month = {Sep 18},
    address = {Corvallis},
    type = {Conference},
    }

  • Khiem Lam, Jialu Hu, Xiaoxi Dong, Heidi Lyng, Natalia Shulzhenko, and Andrey Morgun. "The Microbiome of Cervical Cancer, Microbiome of cervical cancer", In Symposium on Host-Microbe Systems Biology: Synthesis and Selection of Host-Microbe Systems,2015, Eugen, July 31-August 2 .(Conference)
  • [Bibtex]

    @conference{nwpu_index41,
    year = {2015},
    author = {Khiem Lam, Jialu Hu, Xiaoxi Dong, Heidi Lyng, Natalia Shulzhenko, and Andrey Morgun},
    booktitle = {Symposium on Host-Microbe Systems Biology: Synthesis and Selection of Host-Microbe Systems},
    title = {The Microbiome of Cervical Cancer, Microbiome of cervical cancer},
    month = {July 31-August 2},
    address = {Eugen},
    type = {Conference},
    }

  • Xiaoxi Dong, Jialu Hu, Ekaterian Peremyslova, Ivan J. Fuss, Michael Yao, Warren Strober, Natalia Shulzhenko, Andrey Morgun. "Shotgun sequencing reveals transkingdom alterations inimmunodeficiency associated enteropathy", In Symposium on Host-Microbe Systems Biology: Synthesis and Selection of Host-Microbe Systems,2015, Eugen, July 31-August 2 .(Conference)
  • [Bibtex]

    @conference{nwpu_index42,
    year = {2015},
    author = {Xiaoxi Dong, Jialu Hu, Ekaterian Peremyslova, Ivan J. Fuss, Michael Yao, Warren Strober, Natalia Shulzhenko, Andrey Morgun},
    booktitle = {Symposium on Host-Microbe Systems Biology: Synthesis and Selection of Host-Microbe Systems},
    title = {Shotgun sequencing reveals transkingdom alterations inimmunodeficiency associated enteropathy},
    month = {July 31-August 2},
    address = {Eugen},
    type = {Conference},
    }

  • Li, X. and Shen, L. and Shang, X. and Liu, W.. "Subpathway Analysis based on Signaling-Pathway Impact Analysis of Signaling Pathway", Plos One, 2015, 10(7): e0132813.(Article)
  • [Bibtex]

    @article{nwpu_index52,
    year = {2015},
    author = {Li, X. and Shen, L. and Shang, X. and Liu, W.},
    journal = {Plos One},
    title = {Subpathway Analysis based on Signaling-Pathway Impact Analysis of Signaling Pathway},
    number = {7},
    volume = {10},
    article number = {e0132813},
    type = {Article},
    }

  • Qiben Zheng, Liangzhong Shen, Xuequn Shang, Wenbin Liu. "Detecting small attractors of large boolean networks by function-reduction-based strategy", IET Systems Biology, 2015(27): 1-8.(Article)
  • [Bibtex]

    @article{nwpu_index53,
    year = {2015},
    author = {Qiben Zheng, Liangzhong Shen, Xuequn Shang, Wenbin Liu},
    journal = {IET Systems Biology},
    title = {Detecting small attractors of large boolean networks by function-reduction-based strategy},
    number = {27},
    pages = {1-8},
    type = {Article},
    }

  • B Yang, M Xiang, Y Zhang. "Learning discriminant Isomap for dimensionality reduction", In Neural Networks (IJCNN),2015: 1-8.(Conference)
  • [Bibtex]

    @conference{nwpu_index69,
    year = {2015},
    author = {B Yang, M Xiang, Y Zhang},
    booktitle = {Neural Networks (IJCNN)},
    title = {Learning discriminant Isomap for dimensionality reduction},
    pages = {1-8},
    type = {Conference},
    }

2014

  • Bolin Chen, Weiwei Fan, Juan Liu and Fangxiang Wu. " Identifying protein complexes and functional modules-from static PPI networks to dynamic PPI networks ", Briefings in Bioinformatics, 2014, 15(2): 177-194.(Article)
  • [Bibtex]

    @article{nwpu_index8,
    year = {2014},
    author = {Bolin Chen, Weiwei Fan, Juan Liu and Fangxiang Wu},
    journal = {Briefings in Bioinformatics},
    title = { Identifying protein complexes and functional modules-from static PPI networks to dynamic PPI networks },
    number = {2},
    volume = {15},
    pages = {177-194},
    type = {Article},
    abstract = {Cellular processes are typically carried out by protein complexes and functional modules. Identifying them plays an important role for our attempt to reveal principles of cellular organizations and functions. In this article, we review computational algorithms for identifying protein complexes and/or functional modules from protein–protein interaction (PPI) networks. We first describe issues and pitfalls when interpreting PPI networks. Then based on types of data used and main ideas involved, we briefly describe protein complex and/or functional module identification algorithms in four categories: (i) those based on topological structures of unweighted PPI networks; (ii) those based on characters of weighted PPI networks; (iii) those based on multiple data integrations; and (iv) those based on dynamic PPI networks. The PPI networks are modelled increasingly precise when integrating more types of data, and the study of protein complexes would benefit by shifting from static to dynamic P},
    source = {http://bib.oxfordjournals.org/content/15/2/177.long},
    }

  • Jialu Hu, Birte Kehr and Knut Reinert. "NetCoffee: a fast and accurate global alignment approach to identify functionally conserved proteins in multiple networks", Bioinformatics, 2014, 30(4): 540.(Article)
  • [Bibtex]

    @article{nwpu_index15,
    year = {2014},
    author = {Jialu Hu, Birte Kehr and Knut Reinert},
    journal = {Bioinformatics},
    title = {NetCoffee: a fast and accurate global alignment approach to identify functionally conserved proteins in multiple networks},
    number = {4},
    volume = {30},
    article number = {540},
    type = {Article},
    }

  • Bolin Chen, Min Li, Jianxin Wang, Fang-Xiang Wu. "Disease gene identification by using graph kernels and Markov random fields", SCIENCE CHINA Life Science, 2014, 57(11): 1054-1063.(Article)
  • [Bibtex]

    @article{nwpu_index19,
    year = {2014},
    author = {Bolin Chen, Min Li, Jianxin Wang, Fang-Xiang Wu},
    journal = {SCIENCE CHINA Life Science},
    title = {Disease gene identification by using graph kernels and Markov random fields},
    number = {11},
    volume = {57},
    pages = {1054-1063},
    type = {Article},
    abstract = {Genes associated with similar diseases are often functionally related. This principle is largely supported by many biological data sources, such as disease phenotype similarities, protein complexes, protein-protein interactions, pathways and gene expression profiles. Integrating multiple types of biological data is an effective method to identify disease genes for many genetic diseases. To capture the gene-disease associations based on biological networks, a kernel-based MRF method is proposed by combining graph kernels and the Markov random field (MRF) method. In the proposed method, three kinds of kernels are employed to describe the overall relationships of vertices in five biological networks, respectively, and a novel weighted MRF method is developed to integrate those data. In addition, an improved Gibbs sampling procedure and a novel parameter estimation method are proposed to generate predictions from the kernel-based MRF method. Numerical experiments are carried out by integrating known gene-disease associations, protein complexes, protein-protein interactions, pathways and gene expression profiles. The proposed kernel-based MRF method is evaluated by the leave-one-out cross validation paradigm, achieving an AUC score of 0.771 when integrating all those biological data in our experiments, which indicates that our proposed method is very promising compared with many existing methods.},
    source = {http://link.springer.com/article/10.1007%2Fs11427-014-4745-8},
    }

  • Bolin Chen, Jianxin Wang, Min Li, Fang-Xiang Wu. "Identifying disease genes by integrating multiple data sources", BMC Medical Genomics, 2014, 7(Suppl 2): S2.(Article)
  • [Bibtex]

    @article{nwpu_index20,
    year = {2014},
    author = {Bolin Chen, Jianxin Wang, Min Li, Fang-Xiang Wu},
    journal = {BMC Medical Genomics},
    title = {Identifying disease genes by integrating multiple data sources},
    number = {Suppl 2},
    volume = {7},
    article number = {S2},
    type = {Article},
    abstract = { Background Now multiple types of data are available for identifying disease genes. Those data include gene-disease associations, disease phenotype similarities, protein-protein interactions, pathways, gene expression profiles, etc.. It is believed that integrating different kinds of biological data is an effective method to identify disease genes. Results In this paper, we propose a multiple data integration method based on the theory of Markov random field (MRF) and the method of Bayesian analysis for identifying human disease genes. The proposed method is not only flexible in easily incorporating different kinds of data, but also reliable in predicting candidate disease genes. Conclusions Numerical experiments are carried out by integrating known gene-disease associations, protein complexes, protein-protein interactions, pathways and gene expression profiles. Predictions are evaluated by the leave-one-out method. The proposed method achieves an AUC score of 0.743 when integrating all those biological data in our experiments.},
    source = {http://bmcmedgenomics.biomedcentral.com/articles/10.1186/1755-8794-7-S2-S2},
    }

  • Jiajie Peng, Hongxiang Li, Qinghua Jiang, Yadong Wang and Jin Chen. "An Integrative Approach for Measuring Semantic Similarities using Gene Ontology ", BMC systems biology, 2014, 8(5): S8.(Article)
  • [Bibtex]

    @article{nwpu_index31,
    year = {2014},
    author = {Jiajie Peng, Hongxiang Li, Qinghua Jiang, Yadong Wang and Jin Chen},
    journal = {BMC systems biology},
    title = {An Integrative Approach for Measuring Semantic Similarities using Gene Ontology },
    number = {5},
    volume = {8},
    article number = {S8},
    type = {Article},
    }

2013

  • Bolin Chen, Fang-Xiang Wu. "Identifying protein complexes based on multiple topological structures in PPI networks", IEEE Transactions on Nanobioscience, 2013, 12(3): 165-172.(Article)
  • [Bibtex]

    @article{nwpu_index21,
    year = {2013},
    author = {Bolin Chen, Fang-Xiang Wu},
    journal = {IEEE Transactions on Nanobioscience},
    title = {Identifying protein complexes based on multiple topological structures in PPI networks},
    number = {3},
    volume = {12},
    pages = {165-172},
    type = {Article},
    abstract = {Various computational algorithms are developed to identify protein complexes based on only one of specific topological structures in protein-protein interaction (PPI) networks, such as cliques, dense subgraphs, core-attachment structures and starlike structures. However, protein complexes exhibit intricate connections in a PPI network. They cannot be fully detected by only single topological structure. In this paper, we propose an algorithm based on multiple topological structures to identify protein complexes from PPI networks. In the proposed algorithm, four single topological structure based algorithms are first employed to identify raw predictions with specific topological structures, respectively. Those raw predictions are trimmed according to their topological information or GO annotations. Similar results are carefully merged before generating final predictions. Numerical experiments are conducted on a yeast PPI network of DIP and a human PPI network of HPRD. The predicted results show that the multiple topological structure based algorithm can not only obtain a more number of predictions, but also generate results with high accuracy in terms of f-score, matching with known protein complexes and functional enrichments with GO.},
    source = {http://ieeexplore.ieee.org/document/6583247/?reload=true&arnumber=6583247},
    }

  • Bolin Chen, Jinhong Shi, Shenggui Zhang, Fang-Xiang Wu. "Identifying protein complexes in protein-protein interaction networks by using clique seeds and graph entropy", Proteomics, 2013, 13(2): 269-277.(Article)
  • [Bibtex]

    @article{nwpu_index22,
    year = {2013},
    author = {Bolin Chen, Jinhong Shi, Shenggui Zhang, Fang-Xiang Wu},
    journal = {Proteomics},
    title = {Identifying protein complexes in protein-protein interaction networks by using clique seeds and graph entropy},
    number = {2},
    volume = {13},
    pages = {269-277},
    type = {Article},
    abstract = {The identification of protein complexes plays a key role in understanding major cellular processes and biological functions. Various computational algorithms have been proposed to identify protein complexes from protein–protein interaction (PPI) networks. In this paper, we first introduce a new seed-selection strategy for seed-growth style algorithms. Cliques rather than individual vertices are employed as initial seeds. After that, a result-modification approach is proposed based on this seed-selection strategy. Predictions generated by higher order clique seeds are employed to modify results that are generated by lower order ones. The performance of this seed-selection strategy and the result-modification approach are tested by using the entropy-based algorithm, which is currently the best seed-growth style algorithm to detect protein complexes from PPI networks. In addition, we investigate four pairs of strategies for this algorithm in order to improve its accuracy. The numerical experiments are conducted on a Saccharomyces cerevisiae PPI network. The group of best predictions consists of 1711 clusters, with the average f-score at 0.68 after removing all similar and redundant clusters. We conclude that higher order clique seeds can generate predictions with higher accuracy and that our improved entropy-based algorithm outputs more reasonable predictions than the original one.},
    source = {http://onlinelibrary.wiley.com/doi/10.1002/pmic.201200336/abstract},
    }

  • Jiajie Peng, Jin Chen, Yadong Wang. "Identifying cross-category relations in gene ontology and constructing genome-specific term association networks ", BMC bioinformatics, 2013, 14(2): S15.(Article)
  • [Bibtex]

    @article{nwpu_index32,
    year = {2013},
    author = {Jiajie Peng, Jin Chen, Yadong Wang},
    journal = {BMC bioinformatics},
    title = {Identifying cross-category relations in gene ontology and constructing genome-specific term association networks },
    number = {2},
    volume = {14},
    article number = {S15},
    type = {Article},
    }

  • Jialu Hu, Birte Kehr, and Knut Reinert, M-NetAligner. "a novel global alignment approach to identify functional orthologs in multiple networks", In 17th Annual International Conference on Research in Computational Molecular Biology (RECOMB),2013: P207, Beijing, Apr 7-10 .(Conference)
  • [Bibtex]

    @conference{nwpu_index39,
    year = {2013},
    author = {Jialu Hu, Birte Kehr, and Knut Reinert, M-NetAligner},
    booktitle = {17th Annual International Conference on Research in Computational Molecular Biology (RECOMB)},
    title = {a novel global alignment approach to identify functional orthologs in multiple networks},
    month = {Apr 7-10},
    article number = {P207},
    address = {Beijing},
    type = {Conference},
    }

2011

  • Jialu Hu, Ling Sun, Liang Yu, Lin Gao. "A Novel Graph Isomorphism Algorithm Based on Feature Selection in Network Motif Discovery", Sciencepaper online, 2011.(Article)
  • [Bibtex]

    @article{nwpu_index37,
    year = {2011},
    author = {Jialu Hu, Ling Sun, Liang Yu, Lin Gao},
    journal = {Sciencepaper online},
    title = {A Novel Graph Isomorphism Algorithm Based on Feature Selection in Network Motif Discovery},
    type = {Article},
    }

2009

  • Qin Gui-min, Hu Jia-lu, Gao Lin. "A review on algorithms for network motif discovery in biological networks", Chinese Journal of Electronics, 2009, 37(10): 2258-2265.(Article)
  • [Bibtex]

    @article{nwpu_index36,
    year = {2009},
    author = {Qin Gui-min, Hu Jia-lu, Gao Lin},
    journal = {Chinese Journal of Electronics},
    title = {A review on algorithms for network motif discovery in biological networks},
    number = {10},
    volume = {37},
    pages = {2258-2265},
    type = {Article},
    }

  • Jialu Hu, Lin Gao, and Guimin Qin. "Evaluation of subgraph searching algorithms detecting network motif in biological networks", In Frontiers of Computer Science,2009: 412-416, China.(Conference)
  • [Bibtex]

    @conference{nwpu_index38,
    year = {2009},
    author = {Jialu Hu, Lin Gao, and Guimin Qin},
    booktitle = {Frontiers of Computer Science},
    title = {Evaluation of subgraph searching algorithms detecting network motif in biological networks},
    pages = {412-416},
    address = {China},
    type = {Conference},
    }