@article { author = {Mousavi, A. and Shahabzi, F. and Oustan, Sh. and Jafarzadeh, A.A. and Minasny, B.}, title = {Application of Two Data Mining Techniques for Mapping the Spatial Distribution of Soil Organic Carbon (Case Study: East Shore of Urmia Lake)}, journal = {Water and Soil}, volume = {34}, number = {3}, pages = {689-705}, year = {2020}, publisher = {Ferdowsi University of Mashhad}, issn = {2008-4757}, eissn = {2423-396X}, doi = {10.22067/jsw.v34i3.84154}, abstract = {Introduction: Soils are considered as one of the most important parameters to be achieved the sustainable agriculture at any place in the world. Additionally, the digital environment needs to have a soil continuous maps at local and regional scales. However, such information are always not available at the required scale and mapping with high accuracy. Digital soil mapping (DSM) is a key for quantifying and assessing the variation of soil properties such as organic carbon (OC) especially in un-sampled and scarcely sampled areas. Using remotely sensed indices as an important auxiliary information relevant to the study area and data mining techniques were the pathway to create digital maps. Previous studies showed that digital elevation model (DEM) and remotely sensed data are the most commonly useful ancillary data for soil organic carbon prediction. the importance of DEM and derivative data in soil spatial modelling, it was not carried out in our research because there were no sharp differences in relief, and climate for that matter, across the study area. This research aims to investigate the spatial distribution of soil organic carbon (SOC) in a study area in north-western Iran using 21 remotely sensed indices as well as two data mining techniques namely Random Forests (RF) and Cubist. Materials and Methods: This study was performed on the east shore of Urmia Lake located in the east Azerbaijan province, Iran. The area extension is about 500 km2. Based on the synoptic meteorological station report, the average annual precipitation and temperature of the study area is 345.37 mm and 10.83°C, respectively. Soil moisture and temperature regimes are Xeric and Mesic, respectively. Using stratified random soil sampling method, 131 soil samples (for the depth of 0-10 cm) were collected. Soil organic carbon (SOC) were then measured. The next step was to gather a suite of auxiliary data or environmental covariates thought to be useful (and available) for predicting SOC within a DSM framework for the region studied. Then, a number of remotely sensed imagery scenes from the Landsat 8-OLI acquired were collected in July 2017. The RF and Cubist models were applied to establish a relationship between soil organic carbon and auxiliary data. Both reflectance of the individual bands and indices derived from combinations of the individual bands were used. Fourteen spectral indices relevant to four types of data including: i) vegetation and soil; ii) water; iii) landscape; and iv) geology were gathered. Three different statistics was used for evaluating the performance of model in predicting SOC, namely the coefficient of determination (R2), mean error or bias (ME) and root mean square error (RMSE). Results and Discussion: The results of the descriptive statistics of determined and calculated SOC for 131 soil samples showed that the mean and median values for SOC were 2.52% and 2.11%, respectively. Also, the CVs was recorded 57.94%. Minimum and maximum recorded values for SOC were 0.83% and 5.22%, respectively. The contents of SOC was left-skewed in the data set. The RF model prediction was quite good with calibration (R2= 0.89, MSE = 0.16 and ME = 0.01). While, in the Cubist calibration data set, the Valve of RMSE and ME were increased (R2= 0.85, MSE = 0.21 and ME = 0.03). In terms of R2, The RF model showed the higher value (0.89) compared with the Cubist model (0.85) for the validation dataset. Generally, the remote sensing (RS) spectral indices can successfully predict various SOC across the study area. The covariate importance rankings showed that VARI­, NDVI­, CRI2 and CRI1 were the four important covariates to predict SOC in the study area. Accordingly, the changes in SOC over space were not uniform across the study area and also it means that the study area is very dynamic and evolved over time. Conclusion: The results of this study showed that although variables and auxiliary data had different importance in predicting the distribution of SOC, in general it can be found by modelling the relationship between them and SOC through the model. The results revealed that the RF model was suitable for the target variable. Accordingly, the auxiliary variables had different importance in predicting the spatial distribution of SOC. Remote sensing imagery, particularly those encompassing the combined indices played an important role in the prediction of SOC. The obtained results also indicated that the Visible Atmospherically Resistant Index (VARI) and Normalized Difference Vegetation Index (NDVI) were important to predict SOC. The current study revealed that DSM using important environmental covariates can be successfully used in Iran which there is no sufficient soil databases. This research also provided a pathway to start further works in the future such as DSM relevant to the soil erosion, soil ripening, trace elements and so on.}, keywords = {Cubist,Digital soil mapping,Environmental covariates,Modelling,random forest}, title_fa = {کاربرد دو تکنیک داده‌کاوی برای تهیه نقشه پراکنش مکانی کربن آلی خاک (مطالعه موردی: کرانه شرقی دریاچه ارومیه)}, abstract_fa = {در این پژوهش، از دو مدل جنگل تصادفی و کیوبیست به عنوان یکی از پرکاربردترین تکنیک­های نوین داده­کاوی برای تهیه نقشه رقومی کربن آلی خاک در ساحل شرقی دریاچه ارومیه استفاده شد. بدین منظور با استفاده از روش نمونه­برداری تصادفی مرتب شده در منطقه­ای به وسعت 500 کیلومتر­مربع تعداد 131 نمونه خاک سطحی (عمق 10-0 سانتی­متری) و از دوسایت جداگانه برداشت شد. متغیرهای کمکی مورداستفاده در این تحقیق شامل شش باند مستقل برگرفته از تصویر OLI ماهواره لندست 8 (باندهای 2 تا 7)، تجزیه به مؤلفه­های اصلی (PCA) باندها و همچنین تعداد 14 شاخص­ ترکیبی مربوط به تیرماه سال 1396می­باشد. نتایج پیش­بینی مدل در مرحله آزمون (25 درصد داده­ها) نشان داد که مدل جنگل تصادفی با مقادیر (89/0 R2 =، 16/0RMSE = و 01/0ME =) صحت و کارایی بالاتری نسبت به مدل کیوبیست (85/0 R2 =، 21/0RMSE = و 03/0ME =) دارد. همچنین نتایج رتبه­بندی اهمیت متغیرهای کمکی برای پیش­بینی کربن آلی خاک نشان داد که پارامترهای شاخص مرئی مقاومت اتمسفریک (VARI)، شاخص گیاهی نرمال شده (NDVI)، شاخص سنگی شده آهک دو (CRI2) و شاخص سنگی شده آهک یک (CRI1) دارای بیشترین تأثیر و شاخص گچ (GI)­ و برخی باند­های مستقل از جمله باند 5 (B5) و باند 3 (B3) اهمیت کمتری نسبت به سایر شاخص­ها دارند. به­طور کلی نتایج نشان داد که مدل جنگل تصادفی در مقایسه با مدل کیوبیست به نحو مطلوبی قادر به مدل­سازی و پیش­بینی پراکنش مکانی کربن آلی خاک در منطقه مورد مطالعه بوده است.}, keywords_fa = {جنگل تصادفی,کیوبیست,متغیرهای کمکی,مدل‌سازی,نقشه‌برداری رقومی}, url = {https://jsw.um.ac.ir/article_38828.html}, eprint = {https://jsw.um.ac.ir/article_38828_5a284e39c7e5c6eeac61cd11ab9738d7.pdf} }