diff --git a/README.md b/README.md index 78fc981..0db200a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

Updated on 2024-11-24

+

Updated on 2024-11-25

Brain

diff --git a/data_store/papers_2024-11-25.json b/data_store/papers_2024-11-25.json new file mode 100644 index 0000000..d60861e --- /dev/null +++ b/data_store/papers_2024-11-25.json @@ -0,0 +1,519 @@ +{ + "Brain": { + "2411.14418v1": { + "title": "Multimodal 3D Brain Tumor Segmentation with Adversarial Training and Conditional Random Field", + "url": "http://arxiv.org/abs/2411.14418v1", + "authors": "Lan Jiang, Yuchao Zheng, Miao Yu, Haiqing Zhang, Fatemah Aladwani, Alessandro Perelli", + "update_time": "2024-11-21", + "abstract": "Accurate brain tumor segmentation remains a challenging task due to structural complexity and great individual differences of gliomas. Leveraging the pre-eminent detail resilience of CRF and spatial feature extraction capacity of V-net, we propose a multimodal 3D Volume Generative Adversarial Network (3D-vGAN) for precise segmentation. The model utilizes Pseudo-3D for V-net improvement, adds conditional random field after generator and use original image as supplemental guidance. Results, using the BraTS-2018 dataset, show that 3D-vGAN outperforms classical segmentation models, including U-net, Gan, FCN and 3D V-net, reaching specificity over 99.8%." + }, + "2411.14196v1": { + "title": "Uncertainty Quantification in Working Memory via Moment Neural Networks", + "url": "http://arxiv.org/abs/2411.14196v1", + "authors": "Hengyuan Ma, Wenlian Lu, Jianfeng Feng", + "update_time": "2024-11-21", + "abstract": "Humans possess a finely tuned sense of uncertainty that helps anticipate potential errors, vital for adaptive behavior and survival. However, the underlying neural mechanisms remain unclear. This study applies moment neural networks (MNNs) to explore the neural mechanism of uncertainty quantification in working memory (WM). The MNN captures nonlinear coupling of the first two moments in spiking neural networks (SNNs), identifying firing covariance as a key indicator of uncertainty in encoded information. Trained on a WM task, the model demonstrates coding precision and uncertainty quantification comparable to human performance. Analysis reveals a link between the probabilistic and sampling-based coding for uncertainty representation. Transferring the MNN's weights to an SNN replicates these results. Furthermore, the study provides testable predictions demonstrating how noise and heterogeneity enhance WM performance, highlighting their beneficial role rather than being mere biological byproducts. These findings offer insights into how the brain effectively manages uncertainty with exceptional accuracy.", + "code_url": "https://github.com/awakermhy/mnn_wm_uq" + }, + "2411.14147v1": { + "title": "Spiking neural networks: Towards bio-inspired multimodal perception in robotics", + "url": "http://arxiv.org/abs/2411.14147v1", + "authors": "Katerina Maria Oikonomou, Vasiliki Balaska, Konstantinos A. Tsintotas, Christos N. Mavridis, Ioannis Kansizoglou, Antonios Gasteratos", + "update_time": "2024-11-21", + "abstract": "Spiking neural networks (SNNs) have captured apparent interest over the recent years, stemming from neuroscience and reaching the field of artificial intelligence. However, due to their nature SNNs remain far behind in achieving the exceptional performance of deep neural networks (DNNs). As a result, many scholars are exploring ways to enhance SNNs by using learning techniques from DNNs. While this approach has been proven to achieve considerable improvements in SNN performance, we propose another perspective: enhancing the biological plausibility of the models to leverage the advantages of SNNs fully. Our approach aims to propose a brain-like combination of audio-visual signal processing for recognition tasks, intended to succeed in more bio-plausible human-robot interaction applications." + }, + "2411.14017v1": { + "title": "Automatic brain tumor segmentation in 2D intra-operative ultrasound images using MRI tumor annotations", + "url": "http://arxiv.org/abs/2411.14017v1", + "authors": "Mathilde Faanes, Ragnhild Holden Helland, Ole Solheim, Ingerid Reinertsen", + "update_time": "2024-11-21", + "abstract": "Automatic segmentation of brain tumors in intra-operative ultrasound (iUS) images could facilitate localization of tumor tissue during resection surgery. The lack of large annotated datasets limits the current models performances. In this paper, we investigate the use of tumor annotations in pre-operative MRI images, which are more easily accessible than annotations in iUS images, for training of deep learning models for iUS brain tumor segmentation. We used 180 annotated pre-operative MRI images with corresponding unannotated iUS images, and 29 annotated iUS images. Image registration was performed to transfer the MRI annotations to the corresponding iUS images before training models with the nnU-Net framework. To validate the use of MRI labels, the models were compared to a model trained with only US annotated tumors, and a model with both US and MRI annotated tumors. In addition, the results were compared to annotations validated by an expert neurosurgeon on the same test set to measure inter-observer variability. The results showed similar performance for a model trained with only MRI annotated tumors, compared to a model trained with only US annotated tumors. The model trained using both modalities obtained slightly better results with an average Dice score of 0.62, where external expert annotations achieved a score of 0.67. The results also showed that the deep learning models were comparable to expert annotation for larger tumors (> 200 mm2), but perform clearly worse for smaller tumors (< 200 mm2). This shows that MRI tumor annotations can be used as a substitute for US tumor annotations to train a deep learning model for automatic brain tumor segmentation in intra-operative ultrasound images. Small tumors is a limitation for the current models and will be the focus of future work. The main models are available here: https://github.com/mathildefaanes/us_brain_tumor_segmentation.", + "code_url": "https://github.com/mathildefaanes/us_brain_tumor_segmentation" + }, + "2411.13962v1": { + "title": "Hybrid-Neuromorphic Approach for Underwater Robotics Applications: A Conceptual Framework", + "url": "http://arxiv.org/abs/2411.13962v1", + "authors": "Vidya Sudevan, Fakhreddine Zayer, Sajid Javed, Hamad Karki, Giulia De Masi, Jorge Dias", + "update_time": "2024-11-21", + "abstract": "This paper introduces the concept of employing neuromorphic methodologies for task-oriented underwater robotics applications. In contrast to the increasing computational demands of conventional deep learning algorithms, neuromorphic technology, leveraging spiking neural network architectures, promises sophisticated artificial intelligence with significantly reduced computational requirements and power consumption, emulating human brain operational principles. Despite documented neuromorphic technology applications in various robotic domains, its utilization in marine robotics remains largely unexplored. Thus, this article proposes a unified framework for integrating neuromorphic technologies for perception, pose estimation, and haptic-guided conditional control of underwater vehicles, customized to specific user-defined objectives. This conceptual framework stands to revolutionize underwater robotics, enhancing efficiency and autonomy while reducing energy consumption. By enabling greater adaptability and robustness, this advancement could facilitate applications such as underwater exploration, environmental monitoring, and infrastructure maintenance, thereby contributing to significant progress in marine science and technology." + }, + "2411.13490v1": { + "title": "Efficient Brain Imaging Analysis for Alzheimer's and Dementia Detection Using Convolution-Derivative Operations", + "url": "http://arxiv.org/abs/2411.13490v1", + "authors": "Yasmine Mustafa, Mohamed Elmahallawy, Tie Luo", + "update_time": "2024-11-20", + "abstract": "Alzheimer's disease (AD) is characterized by progressive neurodegeneration and results in detrimental structural changes in human brains. Detecting these changes is crucial for early diagnosis and timely intervention of disease progression. Jacobian maps, derived from spatial normalization in voxel-based morphometry (VBM), have been instrumental in interpreting volume alterations associated with AD. However, the computational cost of generating Jacobian maps limits its clinical adoption. In this study, we explore alternative methods and propose Sobel kernel angle difference (SKAD) as a computationally efficient alternative. SKAD is a derivative operation that offers an optimized approach to quantifying volumetric alterations through localized analysis of the gradients. By efficiently extracting gradient amplitude changes at critical spatial regions, this derivative operation captures regional volume variations Evaluation of SKAD over various medical datasets demonstrates that it is 6.3x faster than Jacobian maps while still maintaining comparable accuracy. This makes it an efficient and competitive approach in neuroimaging research and clinical practice." + }, + "2411.13378v1": { + "title": "Quantum-Brain: Quantum-Inspired Neural Network Approach to Vision-Brain Understanding", + "url": "http://arxiv.org/abs/2411.13378v1", + "authors": "Hoang-Quan Nguyen, Xuan-Bac Nguyen, Hugh Churchill, Arabinda Kumar Choudhary, Pawan Sinha, Samee U. Khan, Khoa Luu", + "update_time": "2024-11-20", + "abstract": "Vision-brain understanding aims to extract semantic information about brain signals from human perceptions. Existing deep learning methods for vision-brain understanding are usually introduced in a traditional learning paradigm missing the ability to learn the connectivities between brain regions. Meanwhile, the quantum computing theory offers a new paradigm for designing deep learning models. Motivated by the connectivities in the brain signals and the entanglement properties in quantum computing, we propose a novel Quantum-Brain approach, a quantum-inspired neural network, to tackle the vision-brain understanding problem. To compute the connectivity between areas in brain signals, we introduce a new Quantum-Inspired Voxel-Controlling module to learn the impact of a brain voxel on others represented in the Hilbert space. To effectively learn connectivity, a novel Phase-Shifting module is presented to calibrate the value of the brain signals. Finally, we introduce a new Measurement-like Projection module to present the connectivity information from the Hilbert space into the feature space. The proposed approach can learn to find the connectivities between fMRI voxels and enhance the semantic information obtained from human perceptions. Our experimental results on the Natural Scene Dataset benchmarks illustrate the effectiveness of the proposed method with Top-1 accuracies of 95.1% and 95.6% on image and brain retrieval tasks and an Inception score of 95.3% on fMRI-to-image reconstruction task. Our proposed quantum-inspired network brings a potential paradigm to solving the vision-brain problems via the quantum computing theory." + }, + "2411.13217v1": { + "title": "Energy-based features and bi-LSTM neural network for EEG-based music and voice classification", + "url": "http://arxiv.org/abs/2411.13217v1", + "authors": "Isaac Ariza, Ana M. Barbancho, Lorenzo J. Tardon, Isabel Barbancho", + "update_time": "2024-11-20", + "abstract": "The human brain receives stimuli in multiple ways; among them, audio constitutes an important source of relevant stimuli for the brain regarding communication, amusement, warning, etc. In this context, the aim of this manuscript is to advance in the classification of brain responses to music of diverse genres and to sounds of different nature: speech and music. For this purpose, two different experiments have been designed to acquiere EEG signals from subjects listening to songs of different musical genres and sentences in various languages. With this, a novel scheme is proposed to characterize brain signals for their classification; this scheme is based on the construction of a feature matrix built on relations between energy measured at the different EEG channels and the usage of a bi-LSTM neural network. With the data obtained, evaluations regarding EEG-based classification between speech and music, different musical genres, and whether the subject likes the song listened to or not are carried out. The experiments unveil satisfactory performance to the proposed scheme. The results obtained for binary audio type classification attain 98.66% of success. In multi-class classification between 4 musical genres, the accuracy attained is 61.59%, and results for binary classification of musical taste rise to 96.96%." + }, + "2411.13203v1": { + "title": "A computational framework for integrating Predictive processes with evidence Accumulation Models (PAM)", + "url": "http://arxiv.org/abs/2411.13203v1", + "authors": "Antonino Visalli, Francesco Maria Calistroni, Margherita Calderan, Francesco Donnarumma, Marco Zorzi, Ettore Ambrosini", + "update_time": "2024-11-20", + "abstract": "Evidence Accumulation Models (EAMs) have been widely used to investigate speeded decision-making processes, but they have largely neglected the role of predictive processes emphasized by theories of the predictive brain. In this paper, we present the Predictive evidence Accumulation Models (PAM), a novel computational framework that integrates predictive processes into EAMs. Grounded in the \"observing the observer\" framework, PAM combines models of Bayesian perceptual inference, such as the Hierarchical Gaussian Filter, with three established EAMs (the Diffusion Decision Model, Lognormal Race Model, and Race Diffusion Model) to model decision-making under uncertainty. We validate PAM through parameter recovery simulations, demonstrating its accuracy and computational efficiency across various decision-making scenarios. Additionally, we provide a step-by-step tutorial using real data to illustrate PAM's application and discuss its theoretical implications. PAM represents a significant advancement in the computational modeling of decision-making, bridging the gap between predictive brain theories and EAMs, and offers a promising tool for future empirical research." + }, + "2411.12919v1": { + "title": "Enhancing Deep Learning-Driven Multi-Coil MRI Reconstruction via Self-Supervised Denoising", + "url": "http://arxiv.org/abs/2411.12919v1", + "authors": "Asad Aali, Marius Arvinte, Sidharth Kumar, Yamin I. Arefeen, Jonathan I. Tamir", + "update_time": "2024-11-19", + "abstract": "We examine the effect of incorporating self-supervised denoising as a pre-processing step for training deep learning (DL) based reconstruction methods on data corrupted by Gaussian noise. K-space data employed for training are typically multi-coil and inherently noisy. Although DL-based reconstruction methods trained on fully sampled data can enable high reconstruction quality, obtaining large, noise-free datasets is impractical. We leverage Generalized Stein's Unbiased Risk Estimate (GSURE) for denoising. We evaluate two DL-based reconstruction methods: Diffusion Probabilistic Models (DPMs) and Model-Based Deep Learning (MoDL). We evaluate the impact of denoising on the performance of these DL-based methods in solving accelerated multi-coil magnetic resonance imaging (MRI) reconstruction. The experiments were carried out on T2-weighted brain and fat-suppressed proton-density knee scans. We observed that self-supervised denoising enhances the quality and efficiency of MRI reconstructions across various scenarios. Specifically, employing denoised images rather than noisy counterparts when training DL networks results in lower normalized root mean squared error (NRMSE), higher structural similarity index measure (SSIM) and peak signal-to-noise ratio (PSNR) across different SNR levels, including 32dB, 22dB, and 12dB for T2-weighted brain data, and 24dB, 14dB, and 4dB for fat-suppressed knee data. Overall, we showed that denoising is an essential pre-processing technique capable of improving the efficacy of DL-based MRI reconstruction methods under diverse conditions. By refining the quality of input data, denoising can enable the training of more effective DL networks, potentially bypassing the need for noise-free reference MRI scans.", + "code_url": "https://github.com/utcsilab/gsure-diffusion-mri" + } + }, + "EEG": { + "2411.13931v1": { + "title": "Implementation of tools for lessening the influence of artifacts in EEG signal analysis", + "url": "http://arxiv.org/abs/2411.13931v1", + "authors": "Mario Molina-Molina, Lorenzo J. Tardon, Ana M. Barbancho, Isabel Barbancho", + "update_time": "2024-11-21", + "abstract": "This manuscript describes and implementation of scripts of code aimed at reducing the influence of artifacts, specifically focused on ocular artifacts, in the measurement and processing of electroencephalogram (EEG) signals. This process is of importance because it benefits the analysis and study of long trial samples when the appearance of ocular artifacts cannot be avoided by simply discarding trials. The implementations provided to the reader illustrate, with slight modifications, previously proposed methods aimed at the partial or complete elimination of EEG channels or components are those that resemble the electro-oculogram (EOG) signals in which artifacts are detected. In addition to the description of each of the provided functions, examples of utilization and illustrative figures will be included to show the expected results and processing pipeline." + }, + "2411.13288v1": { + "title": "EEG Signal Denoising Using pix2pix GAN: Enhancing Neurological Data Analysis", + "url": "http://arxiv.org/abs/2411.13288v1", + "authors": "Haoyi Wang, Xufang Chen, Yue Yang, Kewei Zhou, Meining Lv, Dongrui Wang, Wenjie Zhang", + "update_time": "2024-11-20", + "abstract": "Electroencephalography (EEG) is essential in neuroscience and clinical practice, yet it suffers from physiological artifacts, particularly electromyography (EMG), which distort signals. We propose a deep learning model using pix2pixGAN to remove such noise and generate reliable EEG signals. Leveraging the EEGdenoiseNet dataset, we created synthetic datasets with controlled EMG noise levels for model training and testing across a signal-to-noise ratio (SNR) from -7 to 2. Our evaluation metrics included RRMSE and Pearson's CC, assessing both time and frequency domains, and compared our model with others. The pix2pixGAN model excelled, especially under high noise conditions, showing significant improvements in lower RRMSE and higher CC values. This demonstrates the model's superior accuracy and stability in purifying EEG signals, offering a robust solution for EEG analysis challenges and advancing clinical and neuroscience applications." + }, + "2411.13217v1": { + "title": "Energy-based features and bi-LSTM neural network for EEG-based music and voice classification", + "url": "http://arxiv.org/abs/2411.13217v1", + "authors": "Isaac Ariza, Ana M. Barbancho, Lorenzo J. Tardon, Isabel Barbancho", + "update_time": "2024-11-20", + "abstract": "The human brain receives stimuli in multiple ways; among them, audio constitutes an important source of relevant stimuli for the brain regarding communication, amusement, warning, etc. In this context, the aim of this manuscript is to advance in the classification of brain responses to music of diverse genres and to sounds of different nature: speech and music. For this purpose, two different experiments have been designed to acquiere EEG signals from subjects listening to songs of different musical genres and sentences in various languages. With this, a novel scheme is proposed to characterize brain signals for their classification; this scheme is based on the construction of a feature matrix built on relations between energy measured at the different EEG channels and the usage of a bi-LSTM neural network. With the data obtained, evaluations regarding EEG-based classification between speech and music, different musical genres, and whether the subject likes the song listened to or not are carried out. The experiments unveil satisfactory performance to the proposed scheme. The results obtained for binary audio type classification attain 98.66% of success. In multi-class classification between 4 musical genres, the accuracy attained is 61.59%, and results for binary classification of musical taste rise to 96.96%." + }, + "2411.13172v1": { + "title": "Enhanced average for event-related potential analysis using dynamic time warping", + "url": "http://arxiv.org/abs/2411.13172v1", + "authors": "Mario Molina, Lorenzo J. Tardon, Ana M. Barbancho, Irene De-Torres, Isabel Barbancho", + "update_time": "2024-11-20", + "abstract": "Electroencephalography (EEG) provides a way to understand, and evaluate neurotransmission. In this context, time-locked EEG activity or event-related potentials (ERPs) are often used to capture neural activity related to specific mental processes. Normally, they are considered on the basis of averages across a number of trials. However, there exist notable variability in latency jitter, jitter, and amplitude, across trials, and, also, across users; this causes the average ERP waveform to blur, and, furthermore, diminish the amplitude of underlying waves. For these reasons, a strategy is proposed for obtaining ERP waveforms based on dynamic time warping (DTW) to adapt, and adjust individual trials to the averaged ERP, previously calculated, to build an enhanced average by making use of these warped signals. At the sight of the experiments carried out on the behaviour of the proposed scheme using publicly available datasets, this strategy reduces the attenuation in amplitude of ERP components thanks to the reduction of the influence of variability of latency and jitter, and, thus, improves the averaged ERP waveforms." + }, + "2411.12852v1": { + "title": "Enhanced Cross-Dataset Electroencephalogram-based Emotion Recognition using Unsupervised Domain Adaptation", + "url": "http://arxiv.org/abs/2411.12852v1", + "authors": "Md Niaz Imtiaz, Naimul Khan", + "update_time": "2024-11-19", + "abstract": "Emotion recognition has significant potential in healthcare and affect-sensitive systems such as brain-computer interfaces (BCIs). However, challenges such as the high cost of labeled data and variability in electroencephalogram (EEG) signals across individuals limit the applicability of EEG-based emotion recognition models across domains. These challenges are exacerbated in cross-dataset scenarios due to differences in subject demographics, recording devices, and presented stimuli. To address these issues, we propose a novel approach to improve cross-domain EEG-based emotion classification. Our method, Gradual Proximity-guided Target Data Selection (GPTDS), incrementally selects reliable target domain samples for training. By evaluating their proximity to source clusters and the models confidence in predicting them, GPTDS minimizes negative transfer caused by noisy and diverse samples. Additionally, we introduce Prediction Confidence-aware Test-Time Augmentation (PC-TTA), a cost-effective augmentation technique. Unlike traditional TTA methods, which are computationally intensive, PC-TTA activates only when model confidence is low, improving inference performance while drastically reducing computational costs. Experiments on the DEAP and SEED datasets validate the effectiveness of our approach. When trained on DEAP and tested on SEED, our model achieves 67.44% accuracy, a 7.09% improvement over the baseline. Conversely, training on SEED and testing on DEAP yields 59.68% accuracy, a 6.07% improvement. Furthermore, PC-TTA reduces computational time by a factor of 15 compared to traditional TTA methods. Our method excels in detecting both positive and negative emotions, demonstrating its practical utility in healthcare applications. Code available at: https://github.com/RyersonMultimediaLab/EmotionRecognitionUDA", + "code_url": "https://github.com/ryersonmultimedialab/emotionrecognitionuda" + }, + "2411.12400v1": { + "title": "Bi-LSTM neural network for EEG-based error detection in musicians' performance", + "url": "http://arxiv.org/abs/2411.12400v1", + "authors": "Isaac Ariza, Lorenzo J. Tardon, Ana M. Barbancho, Irene De-Torres, Isabel Barbancho", + "update_time": "2024-11-19", + "abstract": "Electroencephalography (EEG) is a tool that allows us to analyze brain activity with high temporal resolution. These measures, combined with deep learning and digital signal processing, are widely used in neurological disorder detection and emotion and mental activity recognition. In this paper, a new method for mental activity recognition is presented; instantaneous frequency, spectral entropy and Mel-frequency cepstral coefficients (MFCC) are used to classify EEG signals using bidirectional LSTM neural networks. It is shown that this method can be used for intra-subject or inter-subject analysis and has been applied to error detection in musician performance reaching compelling accuracy." + }, + "2411.12248v2": { + "title": "Neuro-3D: Towards 3D Visual Decoding from EEG Signals", + "url": "http://arxiv.org/abs/2411.12248v2", + "authors": "Zhanqiang Guo, Jiamin Wu, Yonghao Song, Jiahui Bu, Weijian Mai, Qihao Zheng, Wanli Ouyang, Chunfeng Song", + "update_time": "2024-11-21", + "abstract": "Human's perception of the visual world is shaped by the stereo processing of 3D information. Understanding how the brain perceives and processes 3D visual stimuli in the real world has been a longstanding endeavor in neuroscience. Towards this goal, we introduce a new neuroscience task: decoding 3D visual perception from EEG signals, a neuroimaging technique that enables real-time monitoring of neural dynamics enriched with complex visual cues. To provide the essential benchmark, we first present EEG-3D, a pioneering dataset featuring multimodal analysis data and extensive EEG recordings from 12 subjects viewing 72 categories of 3D objects rendered in both videos and images. Furthermore, we propose Neuro-3D, a 3D visual decoding framework based on EEG signals. This framework adaptively integrates EEG features derived from static and dynamic stimuli to learn complementary and robust neural representations, which are subsequently utilized to recover both the shape and color of 3D objects through the proposed diffusion-based colored point cloud decoder. To the best of our knowledge, we are the first to explore EEG-based 3D visual decoding. Experiments indicate that Neuro-3D not only reconstructs colored 3D objects with high fidelity, but also learns effective neural representations that enable insightful brain region analysis. The dataset and associated code will be made publicly available." + }, + "2411.12145v1": { + "title": "Hierarchical Trait-State Model for Decoding Dyadic Social Interactions", + "url": "http://arxiv.org/abs/2411.12145v1", + "authors": "Qianying Wu, Shigeki Nakauchi, Mohammad Shehata, Shinsuke Shimojo", + "update_time": "2024-11-19", + "abstract": "Traits are patterns of brain signals and behaviors that are stable over time but differ across individuals, whereas states are phasic patterns that vary over time, are influenced by the environment, yet oscillate around the traits. The quality of a social interaction depends on the traits and states of the interacting agents. However, it remains unclear how to decipher both traits and states from the same set of brain signals. To explore the hidden neural traits and states in relation to the behavioral ones during social interactions, we developed a pipeline to extract latent dimensions of the brain from electroencephalogram (EEG) data collected during a team flow task. Our pipeline involved two stages of dimensionality reduction: first, non-negative matrix factorization (NMF), followed by linear discriminant analysis (LDA). This pipeline resulted in an interpretable, seven-dimensional EEG latent space that revealed a trait-state hierarchical structure, with macro-segregation capturing neural traits and micro-segregation capturing neural states. Out of the seven latent dimensions, we found that three that significantly contributed to variations across individuals and task states. Using representational similarity analysis, we mapped the EEG latent space to a skill-cognition space, establishing a connection between hidden neural signatures and social interaction behaviors. Our method demonstrates the feasibility of representing both traits and states within a single model that correlates with changes in social behavior." + }, + "2411.12092v1": { + "title": "Preprocessing for lessening the influence of eye artifacts in eeg analysis", + "url": "http://arxiv.org/abs/2411.12092v1", + "authors": "Alejandro Villena, Lorenzo J. Tardon, Isabel Barbancho, Ana M. Barbancho, Elvira Brattico, Niels T. Haumann", + "update_time": "2024-11-18", + "abstract": "We dealt with the problem of artifacts in eeg signals in relation to the usage of lengthy trials. Specifically, we considered eye artifacts found in eeg signals,their influence in the analysis of the data and alternatives to diminish their impact on later studies of brain activity on lengthy tasks. We proposed a scheme of partial rejection on independent signal components, providesd a method to extract eeg signal components with diministhed influence of eye artifacts, and assess the importance of using artifact free signal excerpts to extract signal components in order to analyze brain activity in a musical context." + }, + "2411.11991v1": { + "title": "Spectral Coarse-Graining and Rescaling for Preserving Structural and Dynamical Properties in Graphs", + "url": "http://arxiv.org/abs/2411.11991v1", + "authors": "M. Schmidt, F. Caccioli, T. Aste", + "update_time": "2024-11-18", + "abstract": "We introduce a graph renormalization procedure based on the coarse-grained Laplacian, which generates reduced-complexity representations for characteristic scales identified through the spectral gap. This method retains both diffusion probabilities and large-scale topological structures, while reducing redundant information, facilitating the analysis of large graphs by decreasing the number of vertices. Applied to graphs derived from EEG recordings of human brain activity, our approach reveals macroscopic properties emerging from neuronal interactions, such as collective behavior in the form of coordinated neuronal activity. Additionally, it shows dynamic reorganization of brain activity across scales, with more generalized patterns during rest and more specialized and scale-invariant activity in the occipital lobe during attention-focused tasks." + } + }, + "BCI": { + "2411.12852v1": { + "title": "Enhanced Cross-Dataset Electroencephalogram-based Emotion Recognition using Unsupervised Domain Adaptation", + "url": "http://arxiv.org/abs/2411.12852v1", + "authors": "Md Niaz Imtiaz, Naimul Khan", + "update_time": "2024-11-19", + "abstract": "Emotion recognition has significant potential in healthcare and affect-sensitive systems such as brain-computer interfaces (BCIs). However, challenges such as the high cost of labeled data and variability in electroencephalogram (EEG) signals across individuals limit the applicability of EEG-based emotion recognition models across domains. These challenges are exacerbated in cross-dataset scenarios due to differences in subject demographics, recording devices, and presented stimuli. To address these issues, we propose a novel approach to improve cross-domain EEG-based emotion classification. Our method, Gradual Proximity-guided Target Data Selection (GPTDS), incrementally selects reliable target domain samples for training. By evaluating their proximity to source clusters and the models confidence in predicting them, GPTDS minimizes negative transfer caused by noisy and diverse samples. Additionally, we introduce Prediction Confidence-aware Test-Time Augmentation (PC-TTA), a cost-effective augmentation technique. Unlike traditional TTA methods, which are computationally intensive, PC-TTA activates only when model confidence is low, improving inference performance while drastically reducing computational costs. Experiments on the DEAP and SEED datasets validate the effectiveness of our approach. When trained on DEAP and tested on SEED, our model achieves 67.44% accuracy, a 7.09% improvement over the baseline. Conversely, training on SEED and testing on DEAP yields 59.68% accuracy, a 6.07% improvement. Furthermore, PC-TTA reduces computational time by a factor of 15 compared to traditional TTA methods. Our method excels in detecting both positive and negative emotions, demonstrating its practical utility in healthcare applications. Code available at: https://github.com/RyersonMultimediaLab/EmotionRecognitionUDA", + "code_url": "https://github.com/ryersonmultimedialab/emotionrecognitionuda" + }, + "2411.11302v1": { + "title": "Towards Personalized Brain-Computer Interface Application Based on Endogenous EEG Paradigms", + "url": "http://arxiv.org/abs/2411.11302v1", + "authors": "Heon-Gyu Kwak, Gi-Hwan Shin, Yeon-Woo Choi, Dong-Hoon Lee, Yoo-In Jeon, Jun-Su Kang, Seong-Whan Lee", + "update_time": "2024-11-18", + "abstract": "In this paper, we propose a conceptual framework for personalized brain-computer interface (BCI) applications, which can offer an enhanced user experience by customizing services to individual preferences and needs, based on endogenous electroencephalography (EEG) paradigms including motor imagery (MI), speech imagery (SI), and visual imagery. The framework includes two essential components: user identification and intention classification, which enable personalized services by identifying individual users and recognizing their intended actions through EEG signals. We validate the feasibility of our framework using a private EEG dataset collected from eight subjects, employing the ShallowConvNet architecture to decode EEG features. The experimental results demonstrate that user identification achieved an average classification accuracy of 0.995, while intention classification achieved 0.47 accuracy across all paradigms, with MI demonstrating the best performance. These findings indicate that EEG signals can effectively support personalized BCI applications, offering robust identification and reliable intention decoding, especially for MI and SI." + }, + "2411.09400v1": { + "title": "Imagined Speech and Visual Imagery as Intuitive Paradigms for Brain-Computer Interfaces", + "url": "http://arxiv.org/abs/2411.09400v1", + "authors": "Seo-Hyun Lee, Ji-Ha Park, Deok-Seon Kim", + "update_time": "2024-11-14", + "abstract": "Recent advancements in brain-computer interface (BCI) technology have emphasized the promise of imagined speech and visual imagery as effective paradigms for intuitive communication. This study investigates the classification performance and brain connectivity patterns associated with these paradigms, focusing on decoding accuracy across selected word classes. Sixteen participants engaged in tasks involving thirteen imagined speech and visual imagery classes, revealing above-chance classification accuracy for both paradigms. Variability in classification accuracy across individual classes highlights the influence of sensory and motor associations in imagined speech and vivid visual associations in visual imagery. Connectivity analysis further demonstrated increased functional connectivity in language-related and sensory regions for imagined speech, whereas visual imagery activated spatial and visual processing networks. These findings suggest the potential of imagined speech and visual imagery as an intuitive and scalable paradigm for BCI communication when selecting optimal word classes. Further exploration of the decoding outcomes for these two paradigms could provide insights for practical BCI communication." + }, + "2411.07652v1": { + "title": "On the BCI Problem", + "url": "http://arxiv.org/abs/2411.07652v1", + "authors": "Ted Dobson, Gregory Robson", + "update_time": "2024-11-12", + "abstract": "Let $G$ be a group. The BCI problem asks whether two Haar graphs of $G$ are isomorphic if and only if they are isomorphic by an element of an explicit list of isomorphisms. We first generalize this problem in a natural way and give a theoretical way to solve the isomorphism problem for the natural generalization. We then restrict our attention to abelian groups and, with an exception, reduce the problem to the isomorphism problem for a related quotient, component, or corresponding Cayley digraph. For Haar graphs of an abelian group of odd order with connection sets $S$ those of Cayley graphs (i.e. $S = -S$), the exception does not exist. For Haar graphs of cyclic groups of odd order with connection sets those of a Cayley graph, among others, we solve the isomorphism problem." + }, + "2411.10469v1": { + "title": "User-wise Perturbations for User Identity Protection in EEG-Based BCIs", + "url": "http://arxiv.org/abs/2411.10469v1", + "authors": "Xiaoqing Chen, Siyang Li, Yunlu Tu, Ziwei Wang, Dongrui Wu", + "update_time": "2024-11-04", + "abstract": "Objective: An electroencephalogram (EEG)-based brain-computer interface (BCI) is a direct communication pathway between the human brain and a computer. Most research so far studied more accurate BCIs, but much less attention has been paid to the ethics of BCIs. Aside from task-specific information, EEG signals also contain rich private information, e.g., user identity, emotion, disorders, etc., which should be protected. Approach: We show for the first time that adding user-wise perturbations can make identity information in EEG unlearnable. We propose four types of user-wise privacy-preserving perturbations, i.e., random noise, synthetic noise, error minimization noise, and error maximization noise. After adding the proposed perturbations to EEG training data, the user identity information in the data becomes unlearnable, while the BCI task information remains unaffected. Main results: Experiments on six EEG datasets using three neural network classifiers and various traditional machine learning models demonstrated the robustness and practicability of the proposed perturbations. Significance: Our research shows the feasibility of hiding user identity information in EEG data without impacting the primary BCI task information." + }, + "2411.02094v1": { + "title": "Alignment-Based Adversarial Training (ABAT) for Improving the Robustness and Accuracy of EEG-Based BCIs", + "url": "http://arxiv.org/abs/2411.02094v1", + "authors": "Xiaoqing Chen, Ziwei Wang, Dongrui Wu", + "update_time": "2024-11-04", + "abstract": "Machine learning has achieved great success in electroencephalogram (EEG) based brain-computer interfaces (BCIs). Most existing BCI studies focused on improving the decoding accuracy, with only a few considering the adversarial security. Although many adversarial defense approaches have been proposed in other application domains such as computer vision, previous research showed that their direct extensions to BCIs degrade the classification accuracy on benign samples. This phenomenon greatly affects the applicability of adversarial defense approaches to EEG-based BCIs. To mitigate this problem, we propose alignment-based adversarial training (ABAT), which performs EEG data alignment before adversarial training. Data alignment aligns EEG trials from different domains to reduce their distribution discrepancies, and adversarial training further robustifies the classification boundary. The integration of data alignment and adversarial training can make the trained EEG classifiers simultaneously more accurate and more robust. Experiments on five EEG datasets from two different BCI paradigms (motor imagery classification, and event related potential recognition), three convolutional neural network classifiers (EEGNet, ShallowCNN and DeepCNN) and three different experimental settings (offline within-subject cross-block/-session classification, online cross-session classification, and pre-trained classifiers) demonstrated its effectiveness. It is very intriguing that adversarial attacks, which are usually used to damage BCI systems, can be used in ABAT to simultaneously improve the model accuracy and robustness.", + "code_url": "https://github.com/xqchen914/abat" + }, + "2411.11874v1": { + "title": "Personalized Continual EEG Decoding Framework for Knowledge Retention and Transfer", + "url": "http://arxiv.org/abs/2411.11874v1", + "authors": "Dan Li, Hye-Bin Shin, Kang Yin", + "update_time": "2024-11-04", + "abstract": "The significant inter-subject variability in electroencephalogram (EEG) signals often leads to knowledge being overwritten as new tasks are introduced in continual EEG decoding. While retraining on the entire dataset with each new input can prevent forgetting, this approach incurs high computational costs. An ideal brain-computer interface (BCI) model should continuously learn new information without retraining from scratch, thus reducing these costs. Most transfer learning models rely on large source-domain datasets for pre-training, yet data availability is frequently limited in real-world applications due to privacy concerns. Furthermore, such models are prone to catastrophic forgetting in continual EEG decoding tasks. To address these challenges, we propose a personalized subject-incremental learning (SIL) framework for continual EEG decoding that integrates Euclidean Alignment for fast domain adaptation, an exemplar replay mechanism to retain prior knowledge, and reservoir sampling-based memory management to handle memory constraints in long-term learning. Validated on the OpenBMI dataset with 54 subjects, our framework effectively balances knowledge retention with classification performance in continual MI-EEG tasks, offering a scalable solution for real-world BCI applications." + }, + "2410.23639v1": { + "title": "Biologically-Inspired Technologies: Integrating Brain-Computer Interface and Neuromorphic Computing for Human Digital Twins", + "url": "http://arxiv.org/abs/2410.23639v1", + "authors": "Chen Shang, Jiadong Yu, Dinh Thai Hoang", + "update_time": "2024-10-31", + "abstract": "The integration of the Metaverse into a human-centric ecosystem has intensified the need for sophisticated Human Digital Twins (HDTs) that are driven by the multifaceted human data. However, the effective construction of HDTs faces significant challenges due to the heterogeneity of data collection devices, the high energy demands associated with processing intricate data, and concerns over the privacy of sensitive information. This work introduces a novel biologically-inspired (bio-inspired) HDT framework that leverages Brain-Computer Interface (BCI) sensor technology to capture brain signals as the data source for constructing HDT. By collecting and analyzing these signals, the framework not only minimizes device heterogeneity and enhances data collection efficiency, but also provides richer and more nuanced physiological and psychological data for constructing personalized HDTs. To this end, we further propose a bio-inspired neuromorphic computing learning model based on the Spiking Neural Network (SNN). This model utilizes discrete neural spikes to emulate the way of human brain processes information, thereby enhancing the system's ability to process data effectively while reducing energy consumption. Additionally, we integrate a Federated Learning (FL) strategy within the model to strengthen data privacy. We then conduct a case study to demonstrate the performance of our proposed twofold bio-inspired scheme. Finally, we present several challenges and promising directions for future research of HDTs driven by bio-inspired technologies." + }, + "2411.09709v1": { + "title": "Feature Selection via Dynamic Graph-based Attention Block in MI-based EEG Signals", + "url": "http://arxiv.org/abs/2411.09709v1", + "authors": "Hyeon-Taek Han, Dae-Hyeok Lee, Heon-Gyu Kwak", + "update_time": "2024-10-31", + "abstract": "Brain-computer interface (BCI) technology enables direct interaction between humans and computers by analyzing brain signals. Electroencephalogram (EEG) is one of the non-invasive tools used in BCI systems, providing high temporal resolution for real-time applications. However, EEG signals are often affected by a low signal-to-noise ratio, physiological artifacts, and individual variability, representing challenges in extracting distinct features. Also, motor imagery (MI)-based EEG signals could contain features with low correlation to MI characteristics, which might cause the weights of the deep model to become biased towards those features. To address these problems, we proposed the end-to-end deep preprocessing method that effectively enhances MI characteristics while attenuating features with low correlation to MI characteristics. The proposed method consisted of the temporal, spatial, graph, and similarity blocks to preprocess MI-based EEG signals, aiming to extract more discriminative features and improve the robustness. We evaluated the proposed method using the public dataset 2a of BCI Competition IV to compare the performances when integrating the proposed method into the conventional models, including the DeepConvNet, the M-ShallowConvNet, and the EEGNet. The experimental results showed that the proposed method could achieve the improved performances and lead to more clustered feature distributions of MI tasks. Hence, we demonstrated that our proposed method could enhance discriminative features related to MI characteristics." + }, + "2411.05811v1": { + "title": "Neurophysiological Analysis in Motor and Sensory Cortices for Improving Motor Imagination", + "url": "http://arxiv.org/abs/2411.05811v1", + "authors": "Si-Hyun Kim, Sung-Jin Kim, Dae-Hyeok Lee", + "update_time": "2024-10-31", + "abstract": "Brain-computer interface (BCI) enables direct communication between the brain and external devices by decoding neural signals, offering potential solutions for individuals with motor impairments. This study explores the neural signatures of motor execution (ME) and motor imagery (MI) tasks using EEG signals, focusing on four conditions categorized as sense-related (hot and cold) and motor-related (pull and push) conditions. We conducted scalp topography analysis to examine activation patterns in the sensorimotor cortex, revealing distinct regional differences: sense--related conditions primarily activated the posterior region of the sensorimotor cortex, while motor--related conditions activated the anterior region of the sensorimotor cortex. These spatial distinctions align with neurophysiological principles, suggesting condition-specific functional subdivisions within the sensorimotor cortex. We further evaluated the performances of three neural network models-EEGNet, ShallowConvNet, and DeepConvNet-demonstrating that ME tasks achieved higher classification accuracies compared to MI tasks. Specifically, in sense-related conditions, the highest accuracy was observed in the cold condition. In motor-related conditions, the pull condition showed the highest performance, with DeepConvNet yielding the highest results. These findings provide insights into optimizing BCI applications by leveraging specific condition-induced neural activations." + } + }, + "fMRI": { + "2411.13378v1": { + "title": "Quantum-Brain: Quantum-Inspired Neural Network Approach to Vision-Brain Understanding", + "url": "http://arxiv.org/abs/2411.13378v1", + "authors": "Hoang-Quan Nguyen, Xuan-Bac Nguyen, Hugh Churchill, Arabinda Kumar Choudhary, Pawan Sinha, Samee U. Khan, Khoa Luu", + "update_time": "2024-11-20", + "abstract": "Vision-brain understanding aims to extract semantic information about brain signals from human perceptions. Existing deep learning methods for vision-brain understanding are usually introduced in a traditional learning paradigm missing the ability to learn the connectivities between brain regions. Meanwhile, the quantum computing theory offers a new paradigm for designing deep learning models. Motivated by the connectivities in the brain signals and the entanglement properties in quantum computing, we propose a novel Quantum-Brain approach, a quantum-inspired neural network, to tackle the vision-brain understanding problem. To compute the connectivity between areas in brain signals, we introduce a new Quantum-Inspired Voxel-Controlling module to learn the impact of a brain voxel on others represented in the Hilbert space. To effectively learn connectivity, a novel Phase-Shifting module is presented to calibrate the value of the brain signals. Finally, we introduce a new Measurement-like Projection module to present the connectivity information from the Hilbert space into the feature space. The proposed approach can learn to find the connectivities between fMRI voxels and enhance the semantic information obtained from human perceptions. Our experimental results on the Natural Scene Dataset benchmarks illustrate the effectiveness of the proposed method with Top-1 accuracies of 95.1% and 95.6% on image and brain retrieval tasks and an Inception score of 95.3% on fMRI-to-image reconstruction task. Our proposed quantum-inspired network brings a potential paradigm to solving the vision-brain problems via the quantum computing theory." + }, + "2411.12321v1": { + "title": "Enhancing Blind Source Separation with Dissociative Principal Component Analysis", + "url": "http://arxiv.org/abs/2411.12321v1", + "authors": "Muhammad Usman Khalid", + "update_time": "2024-11-19", + "abstract": "Sparse principal component analysis (sPCA) enhances the interpretability of principal components (PCs) by imposing sparsity constraints on loading vectors (LVs). However, when used as a precursor to independent component analysis (ICA) for blind source separation (BSS), sPCA may underperform due to its focus on simplicity, potentially disregarding some statistical information essential for effective ICA. To overcome this limitation, a sophisticated approach is proposed that preserves the interpretability advantages of sPCA while significantly enhancing its source extraction capabilities. This consists of two tailored algorithms, dissociative PCA (DPCA1 and DPCA2), which employ adaptive and firm thresholding alongside gradient and coordinate descent approaches to optimize the proposed model dynamically. These algorithms integrate left and right singular vectors from singular value decomposition (SVD) through dissociation matrices (DMs) that replace traditional singular values, thus capturing latent interdependencies effectively to model complex source relationships. This leads to refined PCs and LVs that more accurately represent the underlying data structure. The proposed approach avoids focusing on individual eigenvectors, instead, it collaboratively combines multiple eigenvectors to disentangle interdependencies within each SVD variate. The superior performance of the proposed DPCA algorithms is demonstrated across four varied imaging applications including functional magnetic resonance imaging (fMRI) source retrieval, foreground-background separation, image reconstruction, and image inpainting. They outperformed traditional methods such as PCA+ICA, PPCA+ICA, SPCA+ICA, PMD, and GPower." + }, + "2411.10872v1": { + "title": "In silico discovery of representational relationships across visual cortex", + "url": "http://arxiv.org/abs/2411.10872v1", + "authors": "Alessandro T. Gifford, Maya A. Jastrz\u0119bowska, Johannes J. D. Singer, Radoslaw M. Cichy", + "update_time": "2024-11-16", + "abstract": "Human vision is mediated by a complex interconnected network of cortical brain areas jointly representing visual information. While these areas are increasingly understood in isolation, their representational relationships remain elusive. Here we developed relational neural control (RNC), and used it to investigate the representational relationships for univariate and multivariate fMRI responses of early- and mid-level visual areas. RNC generated and explored in silico fMRI responses for large amounts of images, discovering controlling images that align or disentangle responses across areas, thus indicating their shared or unique representational content. A large portion of representational content was shared across areas, unique representational content increased with cortical distance, and we isolated the visual features determining these effects. Closing the empirical cycle, we validated the in silico discoveries on in vivo fMRI responses from independent subjects. Together, this reveals how visual areas jointly represent the world as an interconnected network." + }, + "2411.09723v1": { + "title": "Towards Neural Foundation Models for Vision: Aligning EEG, MEG, and fMRI Representations for Decoding, Encoding, and Modality Conversion", + "url": "http://arxiv.org/abs/2411.09723v1", + "authors": "Matteo Ferrante, Tommaso Boccato, Grigorii Rashkov, Nicola Toschi", + "update_time": "2024-11-14", + "abstract": "This paper presents a novel approach towards creating a foundational model for aligning neural data and visual stimuli across multimodal representationsof brain activity by leveraging contrastive learning. We used electroencephalography (EEG), magnetoencephalography (MEG), and functional magnetic resonance imaging (fMRI) data. Our framework's capabilities are demonstrated through three key experiments: decoding visual information from neural data, encoding images into neural representations, and converting between neural modalities. The results highlight the model's ability to accurately capture semantic information across different brain imaging techniques, illustrating its potential in decoding, encoding, and modality conversion tasks." + }, + "2411.09098v1": { + "title": "Interdependent scaling exponents in the human brain", + "url": "http://arxiv.org/abs/2411.09098v1", + "authors": "Daniel M. Castro, Ernesto P. Raposo, Mauro Copelli, Fernando A. N. Santos", + "update_time": "2024-11-14", + "abstract": "We apply the phenomenological renormalization group to resting-state fMRI time series of brain activity in a large population. By recursively coarse-graining the data, we compute scaling exponents for the series variance, log probability of silence, and largest covariance eigenvalue. The exponents clearly exhibit linear interdependencies, which we derive analytically in a mean-field approach. We find a significant correlation of exponent values with the gray matter volume and cognitive performance. Akin to scaling relations near critical points in thermodynamics, our findings suggest scaling interdependencies are intrinsic to brain organization and may also exist in other complex systems." + }, + "2411.08973v1": { + "title": "Somatosensory and motor contributions to emotion representation", + "url": "http://arxiv.org/abs/2411.08973v1", + "authors": "Marianne C. Reddan, Luke Chang, Philip Kragel, Tor D. Wager", + "update_time": "2024-11-13", + "abstract": "Emotion is often described as something people 'feel' in their bodies. Embodied emotion theorists propose that this connection is not purely linguistic; perceiving an emotion may require somatosensory and motor re-experiencing. However, it remains unclear whether self-reports of emotion-related bodily sensations (i.e., 'lump in my throat') are related to neural simulations of bodily action and sensation or whether they can be explained by cognitive appraisals or the visual features of socioemotional signals. To investigate this, participants (N = 21) were shown arousing emotional images that varied in valence, complexity, and content while undergoing fMRI scans. Participants then rated the images on a set of emotion appraisal scales and indicated where, on a body map, they experienced sensation in response to the image. To derive normative models of responses on these scales, a separate larger online sample online (N = 56 - 128) also rated these images. Representational similarity analysis (RSA) was used to compare the emotional content in the body maps with appraisals and visual features. A pairwise distance matrix between the body maps generated for each stimulus was then used in a whole brain voxel-wise searchlight analysis to identify brain regions which reflect the representational geometry of embodied emotion. This analysis revealed a network including bilateral primary somatosensory and motor cortices, precuneus, insula, and medial prefrontal cortex. The results of this study suggest that the relationship between emotion and the body is not purely conceptual: It is supported by sensorimotor cortical activations." + }, + "2411.08424v1": { + "title": "A Heterogeneous Graph Neural Network Fusing Functional and Structural Connectivity for MCI Diagnosis", + "url": "http://arxiv.org/abs/2411.08424v1", + "authors": "Feiyu Yin, Yu Lei, Siyuan Dai, Wenwen Zeng, Guoqing Wu, Liang Zhan, Jinhua Yu", + "update_time": "2024-11-13", + "abstract": "Brain connectivity alternations associated with brain disorders have been widely reported in resting-state functional imaging (rs-fMRI) and diffusion tensor imaging (DTI). While many dual-modal fusion methods based on graph neural networks (GNNs) have been proposed, they generally follow homogenous fusion ways ignoring rich heterogeneity of dual-modal information. To address this issue, we propose a novel method that integrates functional and structural connectivity based on heterogeneous graph neural networks (HGNNs) to better leverage the rich heterogeneity in dual-modal images. We firstly use blood oxygen level dependency and whiter matter structure information provided by rs-fMRI and DTI to establish homo-meta-path, capturing node relationships within the same modality. At the same time, we propose to establish hetero-meta-path based on structure-function coupling and brain community searching to capture relations among cross-modal nodes. Secondly, we further introduce a heterogeneous graph pooling strategy that automatically balances homo- and hetero-meta-path, effectively leveraging heterogeneous information and preventing feature confusion after pooling. Thirdly, based on the flexibility of heterogeneous graphs, we propose a heterogeneous graph data augmentation approach that can conveniently address the sample imbalance issue commonly seen in clinical diagnosis. We evaluate our method on ADNI-3 dataset for mild cognitive impairment (MCI) diagnosis. Experimental results indicate the proposed method is effective and superior to other algorithms, with a mean classification accuracy of 93.3%." + }, + "2411.07506v1": { + "title": "FM-TS: Flow Matching for Time Series Generation", + "url": "http://arxiv.org/abs/2411.07506v1", + "authors": "Yang Hu, Xiao Wang, Lirong Wu, Huatian Zhang, Stan Z. Li, Sheng Wang, Tianlong Chen", + "update_time": "2024-11-12", + "abstract": "Time series generation has emerged as an essential tool for analyzing temporal data across numerous fields. While diffusion models have recently gained significant attention in generating high-quality time series, they tend to be computationally demanding and reliant on complex stochastic processes. To address these limitations, we introduce FM-TS, a rectified Flow Matching-based framework for Time Series generation, which simplifies the time series generation process by directly optimizing continuous trajectories. This approach avoids the need for iterative sampling or complex noise schedules typically required in diffusion-based models. FM-TS is more efficient in terms of training and inference. Moreover, FM-TS is highly adaptive, supporting both conditional and unconditional time series generation. Notably, through our novel inference design, the model trained in an unconditional setting can seamlessly generalize to conditional tasks without the need for retraining. Extensive benchmarking across both settings demonstrates that FM-TS consistently delivers superior performance compared to existing approaches while being more efficient in terms of training and inference. For instance, in terms of discriminative score, FM-TS achieves 0.005, 0.019, 0.011, 0.005, 0.053, and 0.106 on the Sines, Stocks, ETTh, MuJoCo, Energy, and fMRI unconditional time series datasets, respectively, significantly outperforming the second-best method which achieves 0.006, 0.067, 0.061, 0.008, 0.122, and 0.167 on the same datasets. We have achieved superior performance in solar forecasting and MuJoCo imputation tasks, significantly enhanced by our innovative $t$ power sampling method. The code is available at https://github.com/UNITES-Lab/FMTS.", + "code_url": "https://github.com/unites-lab/fmts" + }, + "2411.07121v2": { + "title": "Decoding Visual Experience and Mapping Semantics through Whole-Brain Analysis Using fMRI Foundation Models", + "url": "http://arxiv.org/abs/2411.07121v2", + "authors": "Yanchen Wang, Adam Turnbull, Tiange Xiang, Yunlong Xu, Sa Zhou, Adnan Masoud, Shekoofeh Azizi, Feng Vankee Lin, Ehsan Adeli", + "update_time": "2024-11-17", + "abstract": "Neural decoding, the process of understanding how brain activity corresponds to different stimuli, has been a primary objective in cognitive sciences. Over the past three decades, advancements in functional Magnetic Resonance Imaging and machine learning have greatly improved our ability to map visual stimuli to brain activity, especially in the visual cortex. Concurrently, research has expanded into decoding more complex processes like language and memory across the whole brain, utilizing techniques to handle greater variability and improve signal accuracy. We argue that \"seeing\" involves more than just mapping visual stimuli onto the visual cortex; it engages the entire brain, as various emotions and cognitive states can emerge from observing different scenes. In this paper, we develop algorithms to enhance our understanding of visual processes by incorporating whole-brain activation maps while individuals are exposed to visual stimuli. We utilize large-scale fMRI encoders and Image generative models pre-trained on large public datasets, which are then fine-tuned through Image-fMRI contrastive learning. Our models hence can decode visual experience across the entire cerebral cortex, surpassing the traditional confines of the visual cortex. We first compare our method with state-of-the-art approaches to decoding visual processing and show improved predictive semantic accuracy by 43%. A network ablation analysis suggests that beyond the visual cortex, the default mode network contributes most to decoding stimuli, in line with the proposed role of this network in sense-making and semantic processing. Additionally, we implemented zero-shot imagination decoding on an extra validation dataset, achieving a p-value of 0.0206 for mapping the reconstructed images and ground-truth text stimuli, which substantiates the model's capability to capture semantic meanings across various scenarios.", + "code_url": "https://github.com/ppwangyc/wave" + }, + "2411.02949v1": { + "title": "A scalable generative model for dynamical system reconstruction from neuroimaging data", + "url": "http://arxiv.org/abs/2411.02949v1", + "authors": "Eric Volkmann, Alena Br\u00e4ndle, Daniel Durstewitz, Georgia Koppe", + "update_time": "2024-11-05", + "abstract": "Data-driven inference of the generative dynamics underlying a set of observed time series is of growing interest in machine learning and the natural sciences. In neuroscience, such methods promise to alleviate the need to handcraft models based on biophysical principles and allow to automatize the inference of inter-individual differences in brain dynamics. Recent breakthroughs in training techniques for state space models (SSMs) specifically geared toward dynamical systems (DS) reconstruction (DSR) enable to recover the underlying system including its geometrical (attractor) and long-term statistical invariants from even short time series. These techniques are based on control-theoretic ideas, like modern variants of teacher forcing (TF), to ensure stable loss gradient propagation while training. However, as it currently stands, these techniques are not directly applicable to data modalities where current observations depend on an entire history of previous states due to a signal's filtering properties, as common in neuroscience (and physiology more generally). Prominent examples are the blood oxygenation level dependent (BOLD) signal in functional magnetic resonance imaging (fMRI) or Ca$^{2+}$ imaging data. Such types of signals render the SSM's decoder model non-invertible, a requirement for previous TF-based methods. Here, exploiting the recent success of control techniques for training SSMs, we propose a novel algorithm that solves this problem and scales exceptionally well with model dimensionality and filter length. We demonstrate its efficiency in reconstructing dynamical systems, including their state space geometry and long-term temporal properties, from just short BOLD time series.", + "code_url": "https://github.com/humml-lab/GTF-ConvSSM" + } + }, + "MEG": { + "2411.09723v1": { + "title": "Towards Neural Foundation Models for Vision: Aligning EEG, MEG, and fMRI Representations for Decoding, Encoding, and Modality Conversion", + "url": "http://arxiv.org/abs/2411.09723v1", + "authors": "Matteo Ferrante, Tommaso Boccato, Grigorii Rashkov, Nicola Toschi", + "update_time": "2024-11-14", + "abstract": "This paper presents a novel approach towards creating a foundational model for aligning neural data and visual stimuli across multimodal representationsof brain activity by leveraging contrastive learning. We used electroencephalography (EEG), magnetoencephalography (MEG), and functional magnetic resonance imaging (fMRI) data. Our framework's capabilities are demonstrated through three key experiments: decoding visual information from neural data, encoding images into neural representations, and converting between neural modalities. The results highlight the model's ability to accurately capture semantic information across different brain imaging techniques, illustrating its potential in decoding, encoding, and modality conversion tasks." + }, + "2411.07994v1": { + "title": "Search for the X17 particle in $^{7}\\mathrm{Li}(\\mathrm{p},\\mathrm{e}^+ \\mathrm{e}^{-}) ^{8}\\mathrm{Be}$ processes with the MEG II detector", + "url": "http://arxiv.org/abs/2411.07994v1", + "authors": "The MEG II collaboration, K. Afanaciev, A. M. Baldini, S. Ban, H. Benmansour, G. Boca, P. W. Cattaneo, G. Cavoto, F. Cei, M. Chiappini, A. Corvaglia, G. Dal Maso, A. De Bari, M. De Gerone, L. Ferrari Barusso, M. Francesconi, L. Galli, G. Gallucci, F. Gatti, L. Gerritzen, F. Grancagnolo, E. G. Grandoni, M. Grassi, D. N. Grigoriev, M. Hildebrandt, F. Ignatov, F. Ikeda, T. Iwamoto, S. Karpov, P. -R. Kettle, N. Khomutov, A. Kolesnikov, N. Kravchuk, V. Krylov, N. Kuchinskiy, F. Leonetti, W. Li, V. Malyshev, A. Matsushita, M. Meucci, S. Mihara, W. Molzon, T. Mori, D. Nicol\u00f2, H. Nishiguchi, A. Ochi, W. Ootani, A. Oya, D. Palo, M. Panareo, A. Papa, V. Pettinacci, A. Popov, F. Renga, S. Ritt, M. Rossella, A. Rozhdestvensky. S. Scarpellini, P. Schwendimann, G. Signorelli, M. Takahashi, Y. Uchiyama, A. Venturini, B. Vitali, C. Voena, K. Yamamoto, R. Yokota, T. Yonemoto", + "update_time": "2024-11-12", + "abstract": "The observation of a resonance structure in the opening angle of the electron-positron pairs in the $^{7}$Li(p,\\ee) $^{8}$Be reaction was claimed and interpreted as the production and subsequent decay of a hypothetical particle (X17). Similar excesses, consistent with this particle, were later observed in processes involving $^{4}$He and $^{12}$C nuclei with the same experimental technique. The MEG II apparatus at PSI, designed to search for the $\\mu^+ \\rightarrow \\mathrm{e}^+ \\gamma$ decay, can be exploited to investigate the existence of this particle and study its nature. Protons from a Cockroft-Walton accelerator, with an energy up to 1.1 MeV, were delivered on a dedicated Li-based target. The $\\gamma$ and the e$^{+}$e$^{-}$ pair emerging from the $^8\\mathrm{Be}^*$ transitions were studied with calorimeters and a spectrometer, featuring a broader angular acceptance than previous experiments. We present in this paper the analysis of a four-week data-taking in 2023 with a beam energy of 1080 keV, resulting in the excitation of two different resonances with Q-value \\SI{17.6}{\\mega\\electronvolt} and \\SI{18.1}{\\mega\\electronvolt}. No significant signal was found, and limits at \\SI{90}{\\percent} C.L. on the branching ratios (relative to the $\\gamma$ emission) of the two resonances to X17 were set, $R_{17.6} < 1.8 \\times 10^{-6} $ and $R_{18.1} < 1.2 \\times 10^{-5} $." + }, + "2411.03883v2": { + "title": "MEG: Medical Knowledge-Augmented Large Language Models for Question Answering", + "url": "http://arxiv.org/abs/2411.03883v2", + "authors": "Laura Cabello, Carmen Martin-Turrero, Uchenna Akujuobi, Anders S\u00f8gaard, Carlos Bobed", + "update_time": "2024-11-07", + "abstract": "Question answering is a natural language understanding task that involves reasoning over both explicit context and unstated, relevant domain knowledge. Large language models (LLMs), which underpin most contemporary question answering systems, struggle to induce how concepts relate in specialized domains such as medicine. Existing medical LLMs are also costly to train. In this work, we present MEG, a parameter-efficient approach for medical knowledge-augmented LLMs. MEG uses a lightweight mapping network to integrate graph embeddings into the LLM, enabling it to leverage external knowledge in a cost-effective way. We evaluate our method on four popular medical multiple-choice datasets and show that LLMs greatly benefit from the factual grounding provided by knowledge graph embeddings. MEG attains an average of +10.2% accuracy over the Mistral-Instruct baseline, and +6.7% over specialized models like BioMistral. We also show results based on Llama-3. Finally, we show that MEG's performance remains robust to the choice of graph encoder.", + "code_url": "https://github.com/lautel/meg" + }, + "2410.23386v1": { + "title": "STIED: A deep learning model for the SpatioTemporal detection of focal Interictal Epileptiform Discharges with MEG", + "url": "http://arxiv.org/abs/2410.23386v1", + "authors": "Raquel Fern\u00e1ndez-Mart\u00edn, Alfonso Gij\u00f3n, Odile Feys, Elodie Juven\u00e9, Alec Aeby, Charline Urbain, Xavier De Ti\u00e8ge, Vincent Wens", + "update_time": "2024-10-30", + "abstract": "Magnetoencephalography (MEG) allows the non-invasive detection of interictal epileptiform discharges (IEDs). Clinical MEG analysis in epileptic patients traditionally relies on the visual identification of IEDs, which is time consuming and partially subjective. Automatic, data-driven detection methods exist but show limited performance. Still, the rise of deep learning (DL)-with its ability to reproduce human-like abilities-could revolutionize clinical MEG practice. Here, we developed and validated STIED, a simple yet powerful supervised DL algorithm combining two convolutional neural networks with temporal (1D time-course) and spatial (2D topography) features of MEG signals inspired from current clinical guidelines. Our DL model enabled both temporal and spatial localization of IEDs in patients suffering from focal epilepsy with frequent and high amplitude spikes (FE group), with high-performance metrics-accuracy, specificity, and sensitivity all exceeding 85%-when learning from spatiotemporal features of IEDs. This performance can be attributed to our handling of input data, which mimics established clinical MEG practice. Reverse engineering further revealed that STIED encodes fine spatiotemporal features of IEDs rather than their mere amplitude. The model trained on the FE group also showed promising results when applied to a separate group of presurgical patients with different types of refractory focal epilepsy, though further work is needed to distinguish IEDs from physiological transients. This study paves the way of incorporating STIED and DL algorithms into the routine clinical MEG evaluation of epilepsy." + }, + "2410.20916v1": { + "title": "NeuGPT: Unified multi-modal Neural GPT", + "url": "http://arxiv.org/abs/2410.20916v1", + "authors": "Yiqian Yang, Yiqun Duan, Hyejeong Jo, Qiang Zhang, Renjing Xu, Oiwi Parker Jones, Xuming Hu, Chin-teng Lin, Hui Xiong", + "update_time": "2024-10-28", + "abstract": "This paper introduces NeuGPT, a groundbreaking multi-modal language generation model designed to harmonize the fragmented landscape of neural recording research. Traditionally, studies in the field have been compartmentalized by signal type, with EEG, MEG, ECoG, SEEG, fMRI, and fNIRS data being analyzed in isolation. Recognizing the untapped potential for cross-pollination and the adaptability of neural signals across varying experimental conditions, we set out to develop a unified model capable of interfacing with multiple modalities. Drawing inspiration from the success of pre-trained large models in NLP, computer vision, and speech processing, NeuGPT is architected to process a diverse array of neural recordings and interact with speech and text data. Our model mainly focus on brain-to-text decoding, improving SOTA from 6.94 to 12.92 on BLEU-1 and 6.93 to 13.06 on ROUGE-1F. It can also simulate brain signals, thereby serving as a novel neural interface. Code is available at \\href{https://github.com/NeuSpeech/NeuGPT}{NeuSpeech/NeuGPT (https://github.com/NeuSpeech/NeuGPT) .}", + "code_url": "https://github.com/neuspeech/neugpt" + }, + "2410.19986v1": { + "title": "Resolving Domain Shift For Representations Of Speech In Non-Invasive Brain Recordings", + "url": "http://arxiv.org/abs/2410.19986v1", + "authors": "Jeremiah Ridge, Oiwi Parker Jones", + "update_time": "2024-10-25", + "abstract": "Machine learning techniques have enabled researchers to leverage neuroimaging data to decode speech from brain activity, with some amazing recent successes achieved by applications built using invasive devices. However, research requiring surgical implants has a number of practical limitations. Non-invasive neuroimaging techniques provide an alternative but come with their own set of challenges, the limited scale of individual studies being among them. Without the ability to pool the recordings from different non-invasive studies, data on the order of magnitude needed to leverage deep learning techniques to their full potential remains out of reach. In this work, we focus on non-invasive data collected using magnetoencephalography (MEG). We leverage two different, leading speech decoding models to investigate how an adversarial domain adaptation framework augments their ability to generalize across datasets. We successfully improve the performance of both models when training across multiple datasets. To the best of our knowledge, this study is the first ever application of feature-level, deep learning based harmonization for MEG neuroimaging data. Our analysis additionally offers further evidence of the impact of demographic features on neuroimaging data, demonstrating that participant age strongly affects how machine learning models solve speech decoding tasks using MEG data. Lastly, in the course of this study we produce a new open-source implementation of one of these models to the benefit of the broader scientific community." + }, + "2410.19838v1": { + "title": "Non-invasive Neural Decoding in Source Reconstructed Brain Space", + "url": "http://arxiv.org/abs/2410.19838v1", + "authors": "Yonatan Gideoni, Ryan Charles Timms, Oiwi Parker Jones", + "update_time": "2024-10-20", + "abstract": "Non-invasive brainwave decoding is usually done using Magneto/Electroencephalography (MEG/EEG) sensor measurements as inputs. This makes combining datasets and building models with inductive biases difficult as most datasets use different scanners and the sensor arrays have a nonintuitive spatial structure. In contrast, fMRI scans are acquired directly in brain space, a voxel grid with a typical structured input representation. By using established techniques to reconstruct the sensors' sources' neural activity it is possible to decode from voxels for MEG data as well. We show that this enables spatial inductive biases, spatial data augmentations, better interpretability, zero-shot generalisation between datasets, and data harmonisation." + }, + "2410.14971v1": { + "title": "BrainECHO: Semantic Brain Signal Decoding through Vector-Quantized Spectrogram Reconstruction for Whisper-Enhanced Text Generation", + "url": "http://arxiv.org/abs/2410.14971v1", + "authors": "Jilong Li, Zhenxi Song, Jiaqi Wang, Min Zhang, Zhiguo Zhang", + "update_time": "2024-10-19", + "abstract": "Recent advances in decoding language from brain signals (EEG and MEG) have been significantly driven by pre-trained language models, leading to remarkable progress on publicly available non-invasive EEG/MEG datasets. However, previous works predominantly utilize teacher forcing during text generation, leading to significant performance drops without its use. A fundamental issue is the inability to establish a unified feature space correlating textual data with the corresponding evoked brain signals. Although some recent studies attempt to mitigate this gap using an audio-text pre-trained model, Whisper, which is favored for its signal input modality, they still largely overlook the inherent differences between audio signals and brain signals in directly applying Whisper to decode brain signals. To address these limitations, we propose a new multi-stage strategy for semantic brain signal decoding via vEctor-quantized speCtrogram reconstruction for WHisper-enhanced text generatiOn, termed BrainECHO. Specifically, BrainECHO successively conducts: 1) Discrete autoencoding of the audio spectrogram; 2) Brain-audio latent space alignment; and 3) Semantic text generation via Whisper finetuning. Through this autoencoding--alignment--finetuning process, BrainECHO outperforms state-of-the-art methods under the same data split settings on two widely accepted resources: the EEG dataset (Brennan) and the MEG dataset (GWilliams). The innovation of BrainECHO, coupled with its robustness and superiority at the sentence, session, and subject-independent levels across public datasets, underscores its significance for language-based brain-computer interfaces." + }, + "2410.08718v1": { + "title": "Determining sensor geometry and gain in a wearable MEG system", + "url": "http://arxiv.org/abs/2410.08718v1", + "authors": "Ryan M. Hill, Gonzalo Reina Rivero, Ashley J. Tyler, Holly Schofield, Cody Doyle, James Osborne, David Bobela, Lukas Rier, Joseph Gibson, Zoe Tanner, Elena Boto, Richard Bowtell, Matthew J. Brookes, Vishal Shah, Niall Holmes", + "update_time": "2024-10-11", + "abstract": "Optically pumped magnetometers (OPMs) are compact and lightweight sensors that can measure magnetic fields generated by current flow in neuronal assemblies in the brain. Such sensors enable construction of magnetoencephalography (MEG) instrumentation, with significant advantages over conventional MEG devices including adaptability to head size, enhanced movement tolerance, lower complexity and improved data quality. However, realising the potential of OPMs depends on our ability to perform system calibration, which means finding sensor locations, orientations, and the relationship between the sensor output and magnetic field (termed sensor gain). Such calibration is complex in OPMMEG since, for example, OPM placement can change from subject to subject (unlike in conventional MEG where sensor locations or orientations are fixed). Here, we present two methods for calibration, both based on generating well-characterised magnetic fields across a sensor array. Our first device (the HALO) is a head mounted system that generates dipole like fields from a set of coils. Our second (the matrix coil (MC)) generates fields using coils embedded in the walls of a magnetically shielded room. Our results show that both methods offer an accurate means to calibrate an OPM array (e.g. sensor locations within 2 mm of the ground truth) and that the calibrations produced by the two methods agree strongly with each other. When applied to data from human MEG experiments, both methods offer improved signal to noise ratio after beamforming suggesting that they give calibration parameters closer to the ground truth than factory settings and presumed physical sensor coordinates and orientations. Both techniques are practical and easy to integrate into real world MEG applications. This advances the field significantly closer to the routine use of OPMs for MEG recording." + }, + "2410.03191v2": { + "title": "Nested Deep Learning Model Towards A Foundation Model for Brain Signal Data", + "url": "http://arxiv.org/abs/2410.03191v2", + "authors": "Fangyi Wei, Jiajie Mo, Kai Zhang, Haipeng Shen, Srikantan Nagarajan, Fei Jiang", + "update_time": "2024-10-09", + "abstract": "Epilepsy affects over 50 million people globally, with EEG/MEG-based spike detection playing a crucial role in diagnosis and treatment. Manual spike identification is time-consuming and requires specialized training, limiting the number of professionals available to analyze EEG/MEG data. To address this, various algorithmic approaches have been developed. However, current methods face challenges in handling varying channel configurations and in identifying the specific channels where spikes originate. This paper introduces a novel Nested Deep Learning (NDL) framework designed to overcome these limitations. NDL applies a weighted combination of signals across all channels, ensuring adaptability to different channel setups, and allows clinicians to identify key channels more accurately. Through theoretical analysis and empirical validation on real EEG/MEG datasets, NDL demonstrates superior accuracy in spike detection and channel localization compared to traditional methods. The results show that NDL improves prediction accuracy, supports cross-modality data integration, and can be fine-tuned for various neurophysiological applications." + } + }, + "neuroAI": { + "2410.19315v1": { + "title": "A prescriptive theory for brain-like inference", + "url": "http://arxiv.org/abs/2410.19315v1", + "authors": "Hadi Vafaii, Dekel Galor, Jacob L. Yates", + "update_time": "2024-10-25", + "abstract": "The Evidence Lower Bound (ELBO) is a widely used objective for training deep generative models, such as Variational Autoencoders (VAEs). In the neuroscience literature, an identical objective is known as the variational free energy, hinting at a potential unified framework for brain function and machine learning. Despite its utility in interpreting generative models, including diffusion models, ELBO maximization is often seen as too broad to offer prescriptive guidance for specific architectures in neuroscience or machine learning. In this work, we show that maximizing ELBO under Poisson assumptions for general sequence data leads to a spiking neural network that performs Bayesian posterior inference through its membrane potential dynamics. The resulting model, the iterative Poisson VAE (iP-VAE), has a closer connection to biological neurons than previous brain-inspired predictive coding models based on Gaussian assumptions. Compared to amortized and iterative VAEs, iP-VAElearns sparser representations and exhibits superior generalization to out-of-distribution samples. These findings suggest that optimizing ELBO, combined with Poisson assumptions, provides a solid foundation for developing prescriptive theories in NeuroAI." + }, + "2409.05771v1": { + "title": "Evidence from fMRI Supports a Two-Phase Abstraction Process in Language Models", + "url": "http://arxiv.org/abs/2409.05771v1", + "authors": "Emily Cheng, Richard J. Antonello", + "update_time": "2024-09-09", + "abstract": "Research has repeatedly demonstrated that intermediate hidden states extracted from large language models are able to predict measured brain response to natural language stimuli. Yet, very little is known about the representation properties that enable this high prediction performance. Why is it the intermediate layers, and not the output layers, that are most capable for this unique and highly general transfer task? In this work, we show that evidence from language encoding models in fMRI supports the existence of a two-phase abstraction process within LLMs. We use manifold learning methods to show that this abstraction process naturally arises over the course of training a language model and that the first \"composition\" phase of this abstraction process is compressed into fewer layers as training continues. Finally, we demonstrate a strong correspondence between layerwise encoding performance and the intrinsic dimensionality of representations from LLMs. We give initial evidence that this correspondence primarily derives from the inherent compositionality of LLMs and not their next-word prediction properties." + }, + "2407.04117v2": { + "title": "Predictive Coding Networks and Inference Learning: Tutorial and Survey", + "url": "http://arxiv.org/abs/2407.04117v2", + "authors": "Bj\u00f6rn van Zwol, Ro Jefferson, Egon L. van den Broek", + "update_time": "2024-07-22", + "abstract": "Recent years have witnessed a growing call for renewed emphasis on neuroscience-inspired approaches in artificial intelligence research, under the banner of NeuroAI. A prime example of this is predictive coding networks (PCNs), based on the neuroscientific framework of predictive coding. This framework views the brain as a hierarchical Bayesian inference model that minimizes prediction errors through feedback connections. Unlike traditional neural networks trained with backpropagation (BP), PCNs utilize inference learning (IL), a more biologically plausible algorithm that explains patterns of neural activity that BP cannot. Historically, IL has been more computationally intensive, but recent advancements have demonstrated that it can achieve higher efficiency than BP with sufficient parallelization. Furthermore, PCNs can be mathematically considered a superset of traditional feedforward neural networks (FNNs), significantly extending the range of trainable architectures. As inherently probabilistic (graphical) latent variable models, PCNs provide a versatile framework for both supervised learning and unsupervised (generative) modeling that goes beyond traditional artificial neural networks. This work provides a comprehensive review and detailed formal specification of PCNs, particularly situating them within the context of modern ML methods. Additionally, we introduce a Python library (PRECO) for practical implementation. This positions PC as a promising framework for future ML innovations." + }, + "2306.10168v3": { + "title": "Beyond Geometry: Comparing the Temporal Structure of Computation in Neural Circuits with Dynamical Similarity Analysis", + "url": "http://arxiv.org/abs/2306.10168v3", + "authors": "Mitchell Ostrow, Adam Eisen, Leo Kozachkov, Ila Fiete", + "update_time": "2023-10-29", + "abstract": "How can we tell whether two neural networks utilize the same internal processes for a particular computation? This question is pertinent for multiple subfields of neuroscience and machine learning, including neuroAI, mechanistic interpretability, and brain-machine interfaces. Standard approaches for comparing neural networks focus on the spatial geometry of latent states. Yet in recurrent networks, computations are implemented at the level of dynamics, and two networks performing the same computation with equivalent dynamics need not exhibit the same geometry. To bridge this gap, we introduce a novel similarity metric that compares two systems at the level of their dynamics, called Dynamical Similarity Analysis (DSA). Our method incorporates two components: Using recent advances in data-driven dynamical systems theory, we learn a high-dimensional linear system that accurately captures core features of the original nonlinear dynamics. Next, we compare different systems passed through this embedding using a novel extension of Procrustes Analysis that accounts for how vector fields change under orthogonal transformation. In four case studies, we demonstrate that our method disentangles conjugate and non-conjugate recurrent neural networks (RNNs), while geometric methods fall short. We additionally show that our method can distinguish learning rules in an unsupervised manner. Our method opens the door to comparative analyses of the essential temporal structure of computation in neural circuits.", + "code_url": "https://github.com/mitchellostrow/dsa" + }, + "2305.11275v2": { + "title": "Explaining V1 Properties with a Biologically Constrained Deep Learning Architecture", + "url": "http://arxiv.org/abs/2305.11275v2", + "authors": "Galen Pogoncheff, Jacob Granley, Michael Beyeler", + "update_time": "2023-05-25", + "abstract": "Convolutional neural networks (CNNs) have recently emerged as promising models of the ventral visual stream, despite their lack of biological specificity. While current state-of-the-art models of the primary visual cortex (V1) have surfaced from training with adversarial examples and extensively augmented data, these models are still unable to explain key neural properties observed in V1 that arise from biological circuitry. To address this gap, we systematically incorporated neuroscience-derived architectural components into CNNs to identify a set of mechanisms and architectures that comprehensively explain neural activity in V1. We show drastic improvements in model-V1 alignment driven by the integration of architectural components that simulate center-surround antagonism, local receptive fields, tuned normalization, and cortical magnification. Upon enhancing task-driven CNNs with a collection of these specialized components, we uncover models with latent representations that yield state-of-the-art explanation of V1 neural activity and tuning properties. Our results highlight an important advancement in the field of NeuroAI, as we systematically establish a set of architectural components that contribute to unprecedented explanation of V1. The neuroscience insights that could be gleaned from increasingly accurate in-silico models of the brain have the potential to greatly advance the fields of both neuroscience and artificial intelligence." + }, + "2302.07243v4": { + "title": "A Deep Probabilistic Spatiotemporal Framework for Dynamic Graph Representation Learning with Application to Brain Disorder Identification", + "url": "http://arxiv.org/abs/2302.07243v4", + "authors": "Sin-Yee Yap, Junn Yong Loo, Chee-Ming Ting, Fuad Noman, Raphael C. -W. Phan, Adeel Razi, David L. Dowe", + "update_time": "2024-11-09", + "abstract": "Recent applications of pattern recognition techniques on brain connectome classification using functional connectivity (FC) are shifting towards acknowledging the non-Euclidean topology and dynamic aspects of brain connectivity across time. In this paper, a deep spatiotemporal variational Bayes (DSVB) framework is proposed to learn time-varying topological structures in dynamic FC networks for identifying autism spectrum disorder (ASD) in human participants. The framework incorporates a spatial-aware recurrent neural network with an attention-based message passing scheme to capture rich spatiotemporal patterns across dynamic FC networks. To overcome model overfitting on limited training datasets, an adversarial training strategy is introduced to learn graph embedding models that generalize well to unseen brain networks. Evaluation on the ABIDE resting-state functional magnetic resonance imaging dataset shows that our proposed framework substantially outperforms state-of-the-art methods in identifying patients with ASD. Dynamic FC analyses with DSVB-learned embeddings reveal apparent group differences between ASD and healthy controls in brain network connectivity patterns and switching dynamics of brain states. The code is available at https://github.com/Monash-NeuroAI/Deep-Spatiotemporal-Variational-Bayes.", + "code_url": "https://github.com/Monash-NeuroAI/Deep-Spatiotemporal-Variational-Bayes" + }, + "2301.09245v2": { + "title": "Towards NeuroAI: Introducing Neuronal Diversity into Artificial Neural Networks", + "url": "http://arxiv.org/abs/2301.09245v2", + "authors": "Feng-Lei Fan, Yingxin Li, Hanchuan Peng, Tieyong Zeng, Fei Wang", + "update_time": "2023-03-11", + "abstract": "Throughout history, the development of artificial intelligence, particularly artificial neural networks, has been open to and constantly inspired by the increasingly deepened understanding of the brain, such as the inspiration of neocognitron, which is the pioneering work of convolutional neural networks. Per the motives of the emerging field: NeuroAI, a great amount of neuroscience knowledge can help catalyze the next generation of AI by endowing a network with more powerful capabilities. As we know, the human brain has numerous morphologically and functionally different neurons, while artificial neural networks are almost exclusively built on a single neuron type. In the human brain, neuronal diversity is an enabling factor for all kinds of biological intelligent behaviors. Since an artificial network is a miniature of the human brain, introducing neuronal diversity should be valuable in terms of addressing those essential problems of artificial networks such as efficiency, interpretability, and memory. In this Primer, we first discuss the preliminaries of biological neuronal diversity and the characteristics of information transmission and processing in a biological neuron. Then, we review studies of designing new neurons for artificial networks. Next, we discuss what gains can neuronal diversity bring into artificial networks and exemplary applications in several important fields. Lastly, we discuss the challenges and future directions of neuronal diversity to explore the potential of NeuroAI." + }, + "2212.04401v1": { + "title": "A Rubric for Human-like Agents and NeuroAI", + "url": "http://arxiv.org/abs/2212.04401v1", + "authors": "Ida Momennejad", + "update_time": "2022-12-08", + "abstract": "Researchers across cognitive, neuro-, and computer sciences increasingly reference human-like artificial intelligence and neuroAI. However, the scope and use of the terms are often inconsistent. Contributed research ranges widely from mimicking behaviour, to testing machine learning methods as neurally plausible hypotheses at the cellular or functional levels, or solving engineering problems. However, it cannot be assumed nor expected that progress on one of these three goals will automatically translate to progress in others. Here a simple rubric is proposed to clarify the scope of individual contributions, grounded in their commitments to human-like behaviour, neural plausibility, or benchmark/engineering goals. This is clarified using examples of weak and strong neuroAI and human-like agents, and discussing the generative, corroborate, and corrective ways in which the three dimensions interact with one another. The author maintains that future progress in artificial intelligence will need strong interactions across the disciplines, with iterative feedback loops and meticulous validity tests, leading to both known and yet-unknown advances that may span decades to come." + }, + "2210.08340v3": { + "title": "Toward Next-Generation Artificial Intelligence: Catalyzing the NeuroAI Revolution", + "url": "http://arxiv.org/abs/2210.08340v3", + "authors": "Anthony Zador, Sean Escola, Blake Richards, Bence \u00d6lveczky, Yoshua Bengio, Kwabena Boahen, Matthew Botvinick, Dmitri Chklovskii, Anne Churchland, Claudia Clopath, James DiCarlo, Surya Ganguli, Jeff Hawkins, Konrad Koerding, Alexei Koulakov, Yann LeCun, Timothy Lillicrap, Adam Marblestone, Bruno Olshausen, Alexandre Pouget, Cristina Savin, Terrence Sejnowski, Eero Simoncelli, Sara Solla, David Sussillo, Andreas S. Tolias, Doris Tsao", + "update_time": "2023-02-22", + "abstract": "Neuroscience has long been an essential driver of progress in artificial intelligence (AI). We propose that to accelerate progress in AI, we must invest in fundamental research in NeuroAI. A core component of this is the embodied Turing test, which challenges AI animal models to interact with the sensorimotor world at skill levels akin to their living counterparts. The embodied Turing test shifts the focus from those capabilities like game playing and language that are especially well-developed or uniquely human to those capabilities, inherited from over 500 million years of evolution, that are shared with all animals. Building models that can pass the embodied Turing test will provide a roadmap for the next generation of AI." + }, + "2112.15459v3": { + "title": "Social Neuro AI: Social Interaction as the \"dark matter\" of AI", + "url": "http://arxiv.org/abs/2112.15459v3", + "authors": "Samuele Bolotta, Guillaume Dumas", + "update_time": "2022-04-11", + "abstract": "This article introduces a three-axis framework indicating how AI can be informed by biological examples of social learning mechanisms. We argue that the complex human cognitive architecture owes a large portion of its expressive power to its ability to engage in social and cultural learning. However, the field of AI has mostly embraced a solipsistic perspective on intelligence. We thus argue that social interactions not only are largely unexplored in this field but also are an essential element of advanced cognitive ability, and therefore constitute metaphorically the dark matter of AI. In the first section, we discuss how social learning plays a key role in the development of intelligence. We do so by discussing social and cultural learning theories and empirical findings from social neuroscience. Then, we discuss three lines of research that fall under the umbrella of Social NeuroAI and can contribute to developing socially intelligent embodied agents in complex environments. First, neuroscientific theories of cognitive architecture, such as the global workspace theory and the attention schema theory, can enhance biological plausibility and help us understand how we could bridge individual and social theories of intelligence. Second, intelligence occurs in time as opposed to over time, and this is naturally incorporated by dynamical systems. Third, embodiment has been demonstrated to provide more sophisticated array of communicative signals. To conclude, we discuss the example of active inference, which offers powerful insights for developing agents that possess biological realism, can self-organize in time, and are socially embodied." + } + }, + "medical": { + "2411.14418v1": { + "title": "Multimodal 3D Brain Tumor Segmentation with Adversarial Training and Conditional Random Field", + "url": "http://arxiv.org/abs/2411.14418v1", + "authors": "Lan Jiang, Yuchao Zheng, Miao Yu, Haiqing Zhang, Fatemah Aladwani, Alessandro Perelli", + "update_time": "2024-11-21", + "abstract": "Accurate brain tumor segmentation remains a challenging task due to structural complexity and great individual differences of gliomas. Leveraging the pre-eminent detail resilience of CRF and spatial feature extraction capacity of V-net, we propose a multimodal 3D Volume Generative Adversarial Network (3D-vGAN) for precise segmentation. The model utilizes Pseudo-3D for V-net improvement, adds conditional random field after generator and use original image as supplemental guidance. Results, using the BraTS-2018 dataset, show that 3D-vGAN outperforms classical segmentation models, including U-net, Gan, FCN and 3D V-net, reaching specificity over 99.8%." + }, + "2411.14407v1": { + "title": "Exploring Methods for Integrating and Augmenting Multimodal Data to Improve Prognostic Accuracy in Imbalanced Datasets for Intraoperative Aneurysm Occlusion", + "url": "http://arxiv.org/abs/2411.14407v1", + "authors": "Parisa Naghdi, Mohammad Mahdi Shiraz Bhurwani, Ahmad Rahmatpour, Parmita Mondal, Michael Udin, Kyle A Williams, Swetadri Vasan Setlur Nagesh, Ciprian N Ionita", + "update_time": "2024-11-21", + "abstract": "This study evaluates a multimodal machine learning framework for predicting treatment outcomes in intracranial aneurysms (IAs). Combining angiographic parametric imaging (API), patient biomarkers, and disease morphology, the framework aims to enhance prognostic accuracy. Data from 340 patients were analyzed, with separate deep neural networks processing quantitative and categorical data. These networks' pre decision layers were concatenated and inputted into a final predictive network. Various data augmentation strategies, including Synthetic Minority Oversampling Technique for Nominal and Continuous data (SMOTE NC), addressed dataset imbalances. Performance metrics, evaluated through Monte Carlo cross validation, showed significant improvements with augmentation, particularly in intermediate fusion models. This study validates the framework's efficacy in accurately predicting IA treatment outcomes, demonstrating that data augmentation techniques can substantially enhance model performance." + }, + "2411.14385v1": { + "title": "Enhancing Diagnostic Precision in Gastric Bleeding through Automated Lesion Segmentation: A Deep DuS-KFCM Approach", + "url": "http://arxiv.org/abs/2411.14385v1", + "authors": "Xian-Xian Liu, Mingkun Xu, Yuanyuan Wei, Huafeng Qin, Qun Song, Simon Fong, Feng Tien, Wei Luo, Juntao Gao, Zhihua Zhang, Shirley Siu", + "update_time": "2024-11-21", + "abstract": "Timely and precise classification and segmentation of gastric bleeding in endoscopic imagery are pivotal for the rapid diagnosis and intervention of gastric complications, which is critical in life-saving medical procedures. Traditional methods grapple with the challenge posed by the indistinguishable intensity values of bleeding tissues adjacent to other gastric structures. Our study seeks to revolutionize this domain by introducing a novel deep learning model, the Dual Spatial Kernelized Constrained Fuzzy C-Means (Deep DuS-KFCM) clustering algorithm. This Hybrid Neuro-Fuzzy system synergizes Neural Networks with Fuzzy Logic to offer a highly precise and efficient identification of bleeding regions. Implementing a two-fold coarse-to-fine strategy for segmentation, this model initially employs the Spatial Kernelized Fuzzy C-Means (SKFCM) algorithm enhanced with spatial intensity profiles and subsequently harnesses the state-of-the-art DeepLabv3+ with ResNet50 architecture to refine the segmentation output. Through extensive experiments across mainstream gastric bleeding and red spots datasets, our Deep DuS-KFCM model demonstrated unprecedented accuracy rates of 87.95%, coupled with a specificity of 96.33%, outperforming contemporary segmentation methods. The findings underscore the model's robustness against noise and its outstanding segmentation capabilities, particularly for identifying subtle bleeding symptoms, thereby presenting a significant leap forward in medical image processing." + }, + "2411.14353v1": { + "title": "Enhancing Medical Image Segmentation with Deep Learning and Diffusion Models", + "url": "http://arxiv.org/abs/2411.14353v1", + "authors": "Houze Liu, Tong Zhou, Yanlin Xiang, Aoran Shen, Jiacheng Hu, Junliang Du", + "update_time": "2024-11-21", + "abstract": "Medical image segmentation is crucial for accurate clinical diagnoses, yet it faces challenges such as low contrast between lesions and normal tissues, unclear boundaries, and high variability across patients. Deep learning has improved segmentation accuracy and efficiency, but it still relies heavily on expert annotations and struggles with the complexities of medical images. The small size of medical image datasets and the high cost of data acquisition further limit the performance of segmentation networks. Diffusion models, with their iterative denoising process, offer a promising alternative for better detail capture in segmentation. However, they face difficulties in accurately segmenting small targets and maintaining the precision of boundary details. This article discusses the importance of medical image segmentation, the limitations of current deep learning approaches, and the potential of diffusion models to address these challenges." + }, + "2411.14250v1": { + "title": "CP-UNet: Contour-based Probabilistic Model for Medical Ultrasound Images Segmentation", + "url": "http://arxiv.org/abs/2411.14250v1", + "authors": "Ruiguo Yu, Yiyang Zhang, Yuan Tian, Zhiqiang Liu, Xuewei Li, Jie Gao", + "update_time": "2024-11-21", + "abstract": "Deep learning-based segmentation methods are widely utilized for detecting lesions in ultrasound images. Throughout the imaging procedure, the attenuation and scattering of ultrasound waves cause contour blurring and the formation of artifacts, limiting the clarity of the acquired ultrasound images. To overcome this challenge, we propose a contour-based probabilistic segmentation model CP-UNet, which guides the segmentation network to enhance its focus on contour during decoding. We design a novel down-sampling module to enable the contour probability distribution modeling and encoding stages to acquire global-local features. Furthermore, the Gaussian Mixture Model utilizes optimized features to model the contour distribution, capturing the uncertainty of lesion boundaries. Extensive experiments with several state-of-the-art deep learning segmentation methods on three ultrasound image datasets show that our method performs better on breast and thyroid lesions segmentation." + }, + "2411.14249v1": { + "title": "Towards a Physics Engine to Simulate Robotic Laser Surgery: Finite Element Modeling of Thermal Laser-Tissue Interactions", + "url": "http://arxiv.org/abs/2411.14249v1", + "authors": "Nicholas E. Pacheco, Kang Zhang, Ashley S. Reyes, Christopher J. Pacheco, Lucas Burstein, Loris Fichera", + "update_time": "2024-11-21", + "abstract": "This paper presents a computational model, based on the Finite Element Method (FEM), that simulates the thermal response of laser-irradiated tissue. This model addresses a gap in the current ecosystem of surgical robot simulators, which generally lack support for lasers and other energy-based end effectors. In the proposed model, the thermal dynamics of the tissue are calculated as the solution to a heat conduction problem with appropriate boundary conditions. The FEM formulation allows the model to capture complex phenomena, such as convection, which is crucial for creating realistic simulations. The accuracy of the model was verified via benchtop laser-tissue interaction experiments using agar tissue phantoms and ex-vivo chicken muscle. The results revealed an average root-mean-square error (RMSE) of less than 2 degrees Celsius across most experimental conditions." + }, + "2411.14039v1": { + "title": "Uterine Ultrasound Image Captioning Using Deep Learning Techniques", + "url": "http://arxiv.org/abs/2411.14039v1", + "authors": "Abdennour Boulesnane, Boutheina Mokhtari, Oumnia Rana Segueni, Slimane Segueni", + "update_time": "2024-11-21", + "abstract": "Medical imaging has significantly revolutionized medical diagnostics and treatment planning, progressing from early X-ray usage to sophisticated methods like MRIs, CT scans, and ultrasounds. This paper investigates the use of deep learning for medical image captioning, with a particular focus on uterine ultrasound images. These images are vital in obstetrics and gynecology for diagnosing and monitoring various conditions across different age groups. However, their interpretation is often challenging due to their complexity and variability. To address this, a deep learning-based medical image captioning system was developed, integrating Convolutional Neural Networks with a Bidirectional Gated Recurrent Unit network. This hybrid model processes both image and text features to generate descriptive captions for uterine ultrasound images. Our experimental results demonstrate the effectiveness of this approach over baseline methods, with the proposed model achieving superior performance in generating accurate and informative captions, as indicated by higher BLEU and ROUGE scores. By enhancing the interpretation of uterine ultrasound images, our research aims to assist medical professionals in making timely and accurate diagnoses, ultimately contributing to improved patient care." + }, + "2411.14012v1": { + "title": "Logic Augmented Generation", + "url": "http://arxiv.org/abs/2411.14012v1", + "authors": "Aldo Gangemi, Andrea Giovanni Nuzzolese", + "update_time": "2024-11-21", + "abstract": "Semantic Knowledge Graphs (SKG) face challenges with scalability, flexibility, contextual understanding, and handling unstructured or ambiguous information. However, they offer formal and structured knowledge enabling highly interpretable and reliable results by means of reasoning and querying. Large Language Models (LLMs) overcome those limitations making them suitable in open-ended tasks and unstructured environments. Nevertheless, LLMs are neither interpretable nor reliable. To solve the dichotomy between LLMs and SKGs we envision Logic Augmented Generation (LAG) that combines the benefits of the two worlds. LAG uses LLMs as Reactive Continuous Knowledge Graphs that can generate potentially infinite relations and tacit knowledge on-demand. SKGs are key for injecting a discrete heuristic dimension with clear logical and factual boundaries. We exemplify LAG in two tasks of collective intelligence, i.e., medical diagnostics and climate projections. Understanding the properties and limitations of LAG, which are still mostly unknown, is of utmost importance for enabling a variety of tasks involving tacit knowledge in order to provide interpretable and effective results." + }, + "2411.14001v1": { + "title": "Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction", + "url": "http://arxiv.org/abs/2411.14001v1", + "authors": "Yuntao Shou, Peiqiang Yan, Xingjian Yuan, Xiangyong Cao, Qian Zhao, Deyu Meng", + "update_time": "2024-11-21", + "abstract": "In recent years, histopathological whole slide image (WSI)- based survival analysis has attracted much attention in medical image analysis. In practice, WSIs usually come from different hospitals or laboratories, which can be seen as different domains, and thus may have significant differences in imaging equipment, processing procedures, and sample sources. These differences generally result in large gaps in distribution between different WSI domains, and thus the survival analysis models trained on one domain may fail to transfer to another. To address this issue, we propose a Dual-branch Encoder and Two-level Alignment (DETA) framework to explore both feature and category-level alignment between different WSI domains. Specifically, we first formulate the concerned problem as graph domain adaptation (GDA) by virtue the graph representation of WSIs. Then we construct a dual-branch graph encoder, including the message passing branch and the shortest path branch, to explicitly and implicitly extract semantic information from the graph-represented WSIs. To realize GDA, we propose a two-level alignment approach: at the category level, we develop a coupling technique by virtue of the dual-branch structure, leading to reduced divergence between the category distributions of the two domains; at the feature level, we introduce an adversarial perturbation strategy to better augment source domain feature, resulting in improved alignment in feature distribution. To the best of our knowledge, our work is the first attempt to alleviate the domain shift issue for WSI data analysis. Extensive experiments on four TCGA datasets have validated the effectiveness of our proposed DETA framework and demonstrated its superior performance in WSI-based survival analysis." + }, + "2411.13902v1": { + "title": "PIORS: Personalized Intelligent Outpatient Reception based on Large Language Model with Multi-Agents Medical Scenario Simulation", + "url": "http://arxiv.org/abs/2411.13902v1", + "authors": "Zhijie Bao, Qingyun Liu, Ying Guo, Zhengqiang Ye, Jun Shen, Shirong Xie, Jiajie Peng, Xuanjing Huang, Zhongyu Wei", + "update_time": "2024-11-21", + "abstract": "In China, receptionist nurses face overwhelming workloads in outpatient settings, limiting their time and attention for each patient and ultimately reducing service quality. In this paper, we present the Personalized Intelligent Outpatient Reception System (PIORS). This system integrates an LLM-based reception nurse and a collaboration between LLM and hospital information system (HIS) into real outpatient reception setting, aiming to deliver personalized, high-quality, and efficient reception services. Additionally, to enhance the performance of LLMs in real-world healthcare scenarios, we propose a medical conversational data generation framework named Service Flow aware Medical Scenario Simulation (SFMSS), aiming to adapt the LLM to the real-world environments and PIORS settings. We evaluate the effectiveness of PIORS and SFMSS through automatic and human assessments involving 15 users and 15 clinical experts. The results demonstrate that PIORS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical needs. Further details and demo can be found at https://github.com/FudanDISC/PIORS" + } + } +} \ No newline at end of file