{ "data_id": "43849", "name": "2018-Airplane-Flights", "exact_name": "2018-Airplane-Flights", "version": 1, "version_label": "v1.0", "description": "Dataset Description Story\nView the ReadMe file in my Github repo for this project.\nCheck out all the info on my portfolio's webpage for this project.\nAs I write this, I'm a Data Science student. To add to my portfolio, I wanted to build a web app to predict the prices of airline flight prices: the user would be able to select an origin and a destination. I found a database from The Bureau of Transportation Statistics.\nI downloaded their data from Q1, Q2, Q3, and Q4 from 2018 a total of 27M+ rows and 42 columns.\nFor my pricing prediction purposes, I eliminated unnecessary columns, renamed some columns, and refined it for consistency bringing it to a new total of 9M+ rows and 13 columns.\nHave fun and share your kernels, please!\nColumn Descriptions\n1. Unnamed: drop this column (it's a duplicate index column)\n2-3. ItinID MktID: vaguely demonstrates the order in which tickets were ordered (lower ID 's being ordered first)\n4. MktCoupons: the number of coupons in the market for that flight\n5. Quarter: 1, 2, 3, or 4, all of which are in 2018\n6. Origin: the city out of which the flight begins\n7. OriginWac: USA State\/Territory World Area Code\n8. Dest: the city out of which the flight begins\n9. DestWac: USA State\/Territory World Area Code\n10. Miles: the number of miles traveled\n11. ContiguousUSA: binary column -- (2) meaning flight is in the contiguous (48) USA states, and (1) meaning it is not (ie: Hawaii, Alaska, off-shore territories)\n12. NumTicketsOrdered: number of tickets that were purchased by the user\n13. Airline Company: the two-letter airline company code that the user used from start to finish (key codes below)\n14. PricePerTicket: target prediction column\nAirline Company Codes (in order of frequency for this dataset)\nWN -- Southwest Airlines Co.\nDL -- Delta Air Lines Inc.\nAA -- American Airlines Inc.\nUA -- United Air Lines Inc.\nB6 -- JetBlue Airways\nAS -- Alaska Airlines Inc.\nNK -- Spirit Air Lines\nG4 -- Allegiant Air\nF9 -- Frontier Airlines Inc.\nHA -- Hawaiian Airlines Inc.\nSY -- Sun Country Airlines d\/b\/a MN Airlines\nVX -- Virgin America\nUSA State\/Territory World Area Codes\n1 Alaska\n2 Hawaii\n3 Puerto Rico\n4 U.S. Virgin Islands\n5 U.S. Pacific Trust Territories and Possessions\n11 Connecticut\n12 Maine\n13 Massachusetts\n14 New Hampshire\n15 Rhode Island\n16 Vermont\n21 New Jersey\n22 New York\n23 Pennsylvania\n31 Delaware\n32 District of Columbia\n33 Florida\n34 Georgia\n35 Maryland\n36 North Carolina\n37 South Carolina\n38 Virginia\n39 West Virginia\n41 Illinois\n42 Indiana\n43 Michigan\n44 Ohio\n45 Wisconsin\n51 Alabama\n52 Kentucky\n53 Mississippi\n54 Tennessee\n61 Iowa\n62 Kansas\n63 Minnesota\n64 Missouri\n65 Nebraska\n66 North Dakota\n67 South Dakota\n71 Arkansas\n72 Louisiana\n73 Oklahoma\n74 Texas\n81 Arizona\n82 Colorado\n83 Idaho\n84 Montana\n85 Nevada\n86 New Mexico\n87 Utah\n88 Wyoming\n91 California\n92 Oregon\n93 Washington", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 15:44:33", "update_comment": null, "last_update": "2022-03-24 15:44:33", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102674\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "2018-Airplane-Flights", "Dataset Description Story View the ReadMe file in my Github repo for this project. Check out all the info on my portfolio's webpage for this project. As I write this, I'm a Data Science student. To add to my portfolio, I wanted to build a web app to predict the prices of airline flight prices: the user would be able to select an origin and a destination. I found a database from The Bureau of Transportation Statistics. I downloaded their data from Q1, Q2, Q3, and Q4 from 2018 a total of 27M+ rows " ], "weight": 5 }, "qualities": { "NumberOfInstances": 9534417, "NumberOfFeatures": 14, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 11, "NumberOfSymbolicFeatures": 0, "Dimensionality": 1.4683645575812343e-6, "PercentageOfNumericFeatures": 78.57142857142857, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Statistics" } ], "features": [ { "name": "Unnamed:_0", "index": "0", "type": "numeric", "distinct": "9534417", "missing": "0", "min": "0", "max": "9534416", "mean": "4767208", "stdev": "2752349" }, { "name": "ItinID", "index": "1", "type": "numeric", "distinct": "6201347", "missing": "0", "min": "2147483647", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "MktID", "index": "2", "type": "numeric", "distinct": "9534417", "missing": "0", "min": "2147483647", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "MktCoupons", "index": "3", "type": "numeric", "distinct": "3", "missing": "0", "min": "1", "max": "3", "mean": "1", "stdev": "0" }, { "name": "Quarter", "index": "4", "type": "numeric", "distinct": "4", "missing": "0", "min": "1", "max": "4", "mean": "3", "stdev": "1" }, { "name": "Origin", "index": "5", "type": "string", "distinct": "263", "missing": "0" }, { "name": "OriginWac", "index": "6", "type": "numeric", "distinct": "52", "missing": "0", "min": "1", "max": "93", "mean": "55", "stdev": "28" }, { "name": "Dest", "index": "7", "type": "string", "distinct": "260", "missing": "0" }, { "name": "DestWac", "index": "8", "type": "numeric", "distinct": "52", "missing": "0", "min": "1", "max": "93", "mean": "55", "stdev": "28" }, { "name": "Miles", "index": "9", "type": "numeric", "distinct": "2117", "missing": "0", "min": "11", "max": "5095", "mean": "1202", "stdev": "697" }, { "name": "ContiguousUSA", "index": "10", "type": "numeric", "distinct": "2", "missing": "0", "min": "1", "max": "2", "mean": "2", "stdev": "0" }, { "name": "NumTicketsOrdered", "index": "11", "type": "numeric", "distinct": "20", "missing": "0", "min": "1", "max": "20", "mean": "2", "stdev": "3" }, { "name": "AirlineCompany", "index": "12", "type": "string", "distinct": "12", "missing": "0" }, { "name": "PricePerTicket", "index": "13", "type": "numeric", "distinct": "71834", "missing": "0", "min": "50", "max": "1000", "mean": "232", "stdev": "143" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }