Reading in a specific type of text file


/ Published in: C++
Save to your folder(s)

This code creates a program to read in incremental payment data from a comma-separated text file, to accumulate the data and to output the results to a different comma-separated text file.
EXAMPLE INPUT DATA
A short input file might contain the following:
Product, Origin Year, Development Year, Incremental Value
Comp, 1992, 1992, 110.0
Comp, 1992, 1993, 170.0
Comp, 1993, 1993, 200.0
Non-Comp, 1990, 1990, 45.2
Non-Comp, 1990, 1991, 64.8
Non-Comp, 1990, 1993, 37.0
Non-Comp, 1991, 1991, 50.0
Non-Comp, 1991, 1992, 75.0
Non-Comp, 1991, 1993, 25.0
Non-Comp, 1992, 1992, 55.0
Non-Comp, 1992, 1993, 85.0
Non-Comp, 1993, 1993, 100.0


Copy this code and paste it in your HTML
  1. //PRODUCT.HPP FILE
  2. #pragma once
  3. #include <iostream>
  4. #include <vector>
  5.  
  6. #include <boost/fusion/include/vector.hpp>
  7.  
  8. typedef boost::fusion::vector4<std::string, unsigned int, unsigned int, double> bfVector4;
  9.  
  10. class Product
  11. {
  12. public:
  13. Product(const bfVector4& line);
  14.  
  15. void findMinAndMaxOriginYears();
  16. void addData(const bfVector4& currentBFVector);
  17. const std::string getProductName() const { return m_productName; }
  18. const unsigned int getMinOriginYear() const { return m_minOriginYear; }
  19. void setMinOriginYear(unsigned int minOriginYear) { m_minOriginYear = minOriginYear; }
  20. void setMaxOriginYear(unsigned int maxOriginYear) { m_maxOriginYear = maxOriginYear; }
  21. const unsigned int getMaxOriginYear() const { return m_maxOriginYear; }
  22. const std::vector<bfVector4> getData() const { return m_data; }
  23. private:
  24. std::string m_productName;
  25. unsigned int m_minOriginYear;
  26. unsigned int m_maxOriginYear;
  27. std::vector<bfVector4> m_data;
  28. };
  29.  
  30.  
  31. //END PRODUCT.HPP FILE
  32.  
  33. //PRODUCT.CPP FILE
  34. #include "Product.hpp"
  35.  
  36. #include <boost/fusion/sequence.hpp>
  37. #include <boost/foreach.hpp>
  38.  
  39. Product::Product(const bfVector4& line)
  40. {
  41. m_productName = boost::fusion::at_c<0>(line);
  42. m_data.push_back(line);
  43. m_minOriginYear = std::numeric_limits<unsigned int>::max();
  44. m_maxOriginYear = std::numeric_limits<unsigned int>::min();
  45. }
  46.  
  47. void Product::findMinAndMaxOriginYears()
  48. {
  49. BOOST_FOREACH(const bfVector4& line, m_data)
  50. {
  51. unsigned int currentOriginYear = boost::fusion::at_c<1>(line);
  52.  
  53. if (currentOriginYear < m_minOriginYear)
  54. {
  55. m_minOriginYear = currentOriginYear;
  56. continue;
  57. }
  58. if (currentOriginYear > m_maxOriginYear)
  59. {
  60. m_maxOriginYear = currentOriginYear;
  61. continue;
  62. }
  63. }
  64. }
  65.  
  66. void Product::addData(const bfVector4& currentBFVector)
  67. {
  68. m_data.push_back(currentBFVector);
  69. }
  70. //END PRODUCT.CPP FILE
  71.  
  72. //MAIN.CPP FILE
  73. #define _SCL_SECURE_NO_WARNINGS
  74.  
  75. #include <iostream>
  76. #include <fstream>
  77. #include <vector>
  78. #include <map>
  79. #include <set>
  80.  
  81. #include <boost/filesystem.hpp>
  82. #include <boost/algorithm/string.hpp>
  83. #include <boost/fusion/sequence.hpp>
  84. #include <boost/lexical_cast.hpp>
  85. #include <boost/foreach.hpp>
  86.  
  87. #include "Product.hpp"
  88.  
  89. typedef boost::fusion::vector3<std::string, unsigned int, unsigned int> bfVector3;
  90.  
  91. void readFile(const std::string& inputFile, std::set<std::string>& productNames, std::vector<Product>& products)
  92. {
  93. std::ifstream inputFileStream(inputFile);
  94. std::string currentLine;
  95. std::map<std::string, unsigned int> headerPositions;
  96. //the order I want the headers to be in
  97. headerPositions.insert(std::make_pair("Product", 0));
  98. headerPositions.insert(std::make_pair("OriginYear", 1));
  99. headerPositions.insert(std::make_pair("DevelopmentYear", 2));
  100. headerPositions.insert(std::make_pair("IncrementalValue", 3));
  101.  
  102. //process first line first - the headers line, to determine what column goes where; reordering the columns in memory to my liking
  103. std::map<unsigned int, int> correctHeaderPositions;
  104. std::getline(inputFileStream, currentLine);
  105. currentLine.erase(std::remove_if(currentLine.begin(), currentLine.end(), isspace), currentLine.end());
  106. std::vector<std::string> headers;
  107. boost::split(headers, currentLine, boost::is_any_of(","));
  108.  
  109. if (headers.size() != 4)
  110. std::cout << "There should only be 4 headers in your text file. Any extra incorrect columns will be ignored." << std::endl;
  111.  
  112. for (int i = 0; i < (int)headers.size(); ++i)
  113. {
  114. if (!headerPositions.count(headers[i]))
  115. {
  116. correctHeaderPositions.insert(std::make_pair(i, -(i + 1)));
  117. std::cout << "The header \'" << headers[i] << "\' is incorrect. This column will be ignored." << std::endl;
  118. continue;
  119. }
  120.  
  121. correctHeaderPositions.insert(std::make_pair(i, headerPositions[headers[i]]));
  122. }
  123.  
  124. //now read all the other lines
  125. while (std::getline(inputFileStream, currentLine))
  126. {
  127. bool skip = false;
  128. std::vector<std::string> currentData;
  129. bfVector4 currentBFVector;
  130. currentLine.erase(std::remove_if(currentLine.begin(), currentLine.end(), isspace), currentLine.end());
  131. boost::split(currentData, currentLine, boost::is_any_of(","));
  132. for (unsigned int i = 0; i < currentData.size(); ++i)
  133. {
  134. if (correctHeaderPositions[i] < 0 || skip)
  135. continue;
  136.  
  137. unsigned int realPosition = correctHeaderPositions[i];
  138. int currentOriginYear = 0;
  139. int currentDevelopmentYear = 0;
  140. double currentIncrementalValue = 0.0;
  141. switch (realPosition)
  142. {
  143. case 0:
  144. try
  145. {
  146. boost::fusion::at_c<0>(currentBFVector) = boost::lexical_cast<std::string>(currentData[correctHeaderPositions[0]]);
  147. }
  148. catch (boost::bad_lexical_cast)
  149. {
  150. std::cout << "The Product name of the line: " << std::endl << currentLine << std::endl << "is not a valid string. Discarding line."
  151. << std::endl;
  152. skip = true;
  153. break;
  154. }
  155. break;
  156.  
  157. case 1:
  158. try
  159. {
  160. currentOriginYear = boost::lexical_cast<int>(currentData[correctHeaderPositions[1]]);
  161. }
  162. catch (boost::bad_lexical_cast)
  163. {
  164. std::cout << "The Origin Year of the line:" << std::endl << currentLine << std::endl << "is not a valid integer. Discarding line."
  165. << std::endl;
  166. skip = true;
  167. break;
  168. }
  169. //check if the current origin year is positive and non-zero
  170. if (currentOriginYear <= 0)
  171. {
  172. std::cout << "The Origin Year of the line:" << std::endl << currentLine << std::endl << "is zero or negative. Discarding line."
  173. << std::endl;
  174. skip = true;
  175. break;
  176. }
  177. boost::fusion::at_c<1>(currentBFVector) = currentOriginYear;
  178. break;
  179.  
  180. case 2:
  181. try
  182. {
  183. currentDevelopmentYear = boost::lexical_cast<unsigned int>(currentData[correctHeaderPositions[2]]);
  184. }
  185. catch (boost::bad_lexical_cast)
  186. {
  187. std::cout << "The Development Year of the line:" << std::endl << currentLine << std::endl << "is not a valid integer. Discarding line."
  188. << std::endl;
  189. skip = true;
  190. break;
  191. }
  192. //check if the current development year is positive and non-zero
  193. if (currentDevelopmentYear <= 0)
  194. {
  195. std::cout << "The Development Year of the line:" << std::endl << currentLine << std::endl << "is zero or negative. Discarding line."
  196. << std::endl;
  197. skip = true;
  198. break;
  199. }
  200. //check if the current development year is less than it's respective origin year
  201. if (currentDevelopmentYear < (int)boost::fusion::at_c<1>(currentBFVector))
  202. {
  203. std::cout << "The Development Year of the line:" << std::endl << currentLine << std::endl << "is less than it's corresponding origin year. "
  204. << "Discarding line." << std::endl;
  205. skip = true;
  206. break;
  207. }
  208. boost::fusion::at_c<2>(currentBFVector) = currentDevelopmentYear;
  209. break;
  210.  
  211. case 3:
  212. try
  213. {
  214. currentIncrementalValue = boost::lexical_cast<double>(currentData[correctHeaderPositions[3]]);
  215. }
  216. catch (boost::bad_lexical_cast)
  217. {
  218. std::cout << "The Incremental Value of the line:" << std::endl << currentLine << std::endl << "is not a valid real number. "
  219. << "Discarding line." << std::endl;
  220. skip = true;
  221. break;
  222. }
  223. //check if the current incremental value is positive and non-zero
  224. if (currentIncrementalValue <= 0.0)
  225. {
  226. std::cout << "The Incremental Value of the line:" << std::endl << currentLine << std::endl << "zero or negative. Discarding line."
  227. << std::endl;
  228. skip = true;
  229. break;
  230. }
  231. boost::fusion::at_c<3>(currentBFVector) = currentIncrementalValue;
  232. break;
  233. }
  234. }
  235. //if this is a new product, add it to the products vector
  236. if (!productNames.count(boost::fusion::at_c<0>(currentBFVector)))
  237. {
  238. products.push_back(Product(currentBFVector));
  239. productNames.insert(boost::fusion::at_c<0>(currentBFVector));
  240. continue;
  241. }
  242.  
  243. //if this is an existing product, then add the data to the relevant product
  244. for (unsigned int i = 0; i < products.size(); ++i)
  245. if (products[i].getProductName() == boost::fusion::at_c<0>(currentBFVector))
  246. products[i].addData(currentBFVector);
  247. }
  248. }
  249.  
  250. bool findGlobalMinAndMaxOriginYears(std::vector<Product>& products, unsigned int& minOriginYear, unsigned int& maxOriginYear)
  251. {
  252. BOOST_FOREACH(Product& product, products)
  253. {
  254. product.findMinAndMaxOriginYears();
  255. unsigned int currentMinOriginYear = product.getMinOriginYear();
  256. unsigned int currentMaxOriginYear = product.getMaxOriginYear();
  257.  
  258. if (currentMinOriginYear < minOriginYear)
  259. minOriginYear = currentMinOriginYear;
  260. if (currentMaxOriginYear > maxOriginYear)
  261. maxOriginYear = currentMaxOriginYear;
  262. }
  263.  
  264. if (minOriginYear < std::numeric_limits<unsigned int>::max() && maxOriginYear > std::numeric_limits<unsigned int>::min())
  265. return true;
  266.  
  267. return false;
  268. }
  269.  
  270. void fillInBlanks(std::vector<Product>& products, const unsigned int minOriginYear, const unsigned int maxOriginYear)
  271. {
  272. BOOST_FOREACH(Product& product, products)
  273. {
  274. unsigned int productMinOriginYear = product.getMinOriginYear();
  275. unsigned int productMaxOriginYear = product.getMaxOriginYear();
  276. std::string productName = product.getProductName();
  277. //fill in blanks in between min and max product origin years
  278. for (unsigned int i = productMinOriginYear; i <= productMaxOriginYear; ++i)
  279. {
  280. for (unsigned int j = i; j <= productMaxOriginYear; ++j)
  281. {
  282. bool needsInserting = true;
  283. BOOST_FOREACH(const bfVector4& line, product.getData())
  284. {
  285. if (bfVector3(boost::fusion::at_c<0>(line), boost::fusion::at_c<1>(line), boost::fusion::at_c<2>(line))
  286. == bfVector3(productName, i, j))
  287. {
  288. needsInserting = false;
  289. break;
  290. }
  291. }
  292.  
  293. if (needsInserting)
  294. product.addData(bfVector4(productName, i, j, 0.0));
  295. }
  296. }
  297. //fill in missing origin years before the current product min origin year
  298. if (minOriginYear < productMinOriginYear)
  299. {
  300. for (unsigned int i = minOriginYear; i < productMinOriginYear; ++i)
  301. for (unsigned int j = i; j <= maxOriginYear; ++j)
  302. product.addData(bfVector4(productName, i, j, 0.0));
  303.  
  304. product.setMinOriginYear(minOriginYear);
  305. }
  306. //fill in missing origin years after the current product max origin year
  307. if (maxOriginYear > productMaxOriginYear)
  308. {
  309. for (unsigned int i = productMaxOriginYear + 1; i <= maxOriginYear; ++i)
  310. for (unsigned int j = i; j <= maxOriginYear; ++j)
  311. product.addData(bfVector4(productName, i, j, 0.0));
  312.  
  313. product.setMaxOriginYear(maxOriginYear);
  314. }
  315. }
  316. }
  317.  
  318. void outputFile(std::ofstream& output, const std::vector<Product> products, const unsigned int minOriginYear, const unsigned int developmentYears)
  319. {
  320. output << minOriginYear << ", " << developmentYears << std::endl;
  321. BOOST_FOREACH(const Product& product, products)
  322. {
  323. output << product.getProductName();
  324. std::vector<double> productOutput((developmentYears * (developmentYears + 1)) / 2, 0.0); //vector of incremental values to be summed
  325. //go through each origin year and workout where in the productOutput array the corresponding incremental value for each line should go
  326. for (unsigned int i = product.getMinOriginYear(); i <= product.getMaxOriginYear(); ++i)
  327. {
  328. BOOST_FOREACH(bfVector4 line, product.getData())
  329. {
  330. if (boost::fusion::at_c<1>(line) != i)
  331. continue;
  332.  
  333. unsigned int currentOriginYearFromMin = i - product.getMinOriginYear();
  334. //the following index was worked out using the fact that the number of incremental values decreases by 1 each time each time
  335. //the origin year increases, and the difference between the development year and the origin year, amongst other things.
  336. //used pen and paper to work out this index
  337. int index = (currentOriginYearFromMin * ((2 * (developmentYears - 1)) + 3 - currentOriginYearFromMin)) / 2
  338. + (boost::fusion::at_c<2>(line) - i);
  339. productOutput.at(index) = boost::fusion::at_c<3>(line);
  340. }
  341. }
  342. //now go through vector just constructed and sum relevant values
  343. for (unsigned int i = developmentYears; i > 0; --i)
  344. {
  345. for (unsigned int j = 0; j < i; ++j)
  346. {
  347. double sum = 0.0;
  348. for (int k = j; k >= 0; --k)
  349. {
  350. if (productOutput[k] != 0)
  351. sum += productOutput[k];
  352. }
  353.  
  354. output << ", " << sum;
  355. }
  356.  
  357. productOutput.erase(productOutput.begin(), productOutput.begin() + i);
  358. }
  359.  
  360. output << std::endl;
  361. }
  362.  
  363. output.close();
  364. }
  365.  
  366. void main()
  367. {
  368. boost::filesystem::path inputFile, outputFileDir;
  369. std::cout << "Please enter the path of the file you wish to examine: " << std::endl;
  370. std::cin >> inputFile;
  371. std::cout << "Note: The headers in your text file must be exactly the following, and nothing more:" << std::endl
  372. << "Product" << std::endl << "Origin Year" << std::endl << "Development Year" << std::endl << "Incremental Value" << std::endl;
  373.  
  374. //do some checks on the file specified first
  375. if (!boost::filesystem::exists(inputFile))
  376. {
  377. std::cout << "The file specified doesn't exist." << std::endl;
  378. return;
  379. }
  380.  
  381. if (inputFile.extension() != ".txt")
  382. {
  383. std::cout << " The file specified is not a .txt file. Please specify a path to a .txt file." << std::endl;
  384. return;
  385. }
  386.  
  387. std::cout << "Please enter the path of the directory that you wish the output file to be saved in: " << std::endl;
  388. std::cin >> outputFileDir;
  389.  
  390. if (!boost::filesystem::exists(outputFileDir))
  391. boost::filesystem::create_directories(outputFileDir);
  392.  
  393. //start reading file
  394. std::set<std::string> productNames;
  395. std::vector<Product> products;
  396. readFile(inputFile.string(), productNames, products);
  397. unsigned int minOriginYear = std::numeric_limits<unsigned int>::max();
  398. unsigned int maxOriginYear = std::numeric_limits<unsigned int>::min();
  399. unsigned int developmentYears = std::numeric_limits<unsigned int>::max();
  400. if (findGlobalMinAndMaxOriginYears(products, minOriginYear, maxOriginYear))
  401. developmentYears = maxOriginYear - minOriginYear + 1;
  402.  
  403. fillInBlanks(products, minOriginYear, maxOriginYear);
  404. //output results
  405. std::ofstream output((outputFileDir / "TriangleOutput.txt").string().c_str());
  406. outputFile(output, products, minOriginYear, developmentYears);
  407. }
  408. END OF MAIN.CPP FILE

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.