index.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. #!/usr/bin/python3
  2. import mysql.connector
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import urllib.parse
  6. import re
  7. from sys import exit as exit
  8. import json
  9. import datetime
  10. import custom_email
  11. from tabulate import tabulate
  12. ### TO DO ###
  13. #
  14. # email results
  15. # allow this script to be called and work by itself (if __name__ == __main__)
  16. # Print useful reports (land only, house and land, etc)
  17. # Check if db entries no longer appear online (mark expired)
  18. # When checking online from various sites, check if address already exists in db
  19. # - if so, warn user and do not add
  20. # Add date_added to initial entries
  21. # Check results against database for changes
  22. # - update and add/change date_modified
  23. # Add argument to run update query when results.py is calles
  24. # Add database column to hold parcel number. Make links to GIS servers
  25. #
  26. # IDENTIFY NEW PROPERTIES!!
  27. #############
  28. class Property:
  29. """Description of a proerty"""
  30. def __init__ (self, site_name, type, MLS, address, city, st, zip, \
  31. county, price, acres, title='', sqft=0, bedrooms=0, baths=0, description='', link=''):
  32. self.site_name = site_name
  33. self.type = type
  34. self.MLS = MLS
  35. self.title = title
  36. self.sqft = sqft
  37. self.bedrooms = bedrooms
  38. self.baths = baths
  39. self.address = address
  40. self.city = city
  41. self.st = st
  42. self.zip = zip
  43. self.county = county
  44. self.price = price
  45. self.acres = acres
  46. self.description = description
  47. self.link = link
  48. # self.distance_to_work = 0
  49. # self.time_to_work = 0
  50. # self.distance_to_school = 0
  51. # self.time_to_school = 0
  52. class Search:
  53. """Universal Search Criteria"""
  54. def __init__(self, county: list, lower_price=0, upper_price=500000, \
  55. lower_acres=5, upper_acres=35, type=['farm','land','home'], lower_sqft='', upper_sqft='', \
  56. lower_bedrooms='', upper_bedrooms=''):
  57. self.types=['land', 'farm', 'home', 'house']
  58. self.county = county
  59. self.lower_price = lower_price
  60. self.upper_price = upper_price
  61. self.lower_acres = lower_acres
  62. self.upper_acres = upper_acres
  63. self.type = type ##accept list!
  64. self.lower_sqft = lower_sqft
  65. self.upper_sqft = upper_sqft
  66. self.lower_bedrooms = lower_bedrooms
  67. self.upper_bedrooms = upper_bedrooms
  68. for property_type in type:
  69. assert property_type in self.types, ("Unknown type '" + property_type + "'. Property Type must be of type: " + str(self.types))
  70. class ImproperSearchError(Exception):
  71. def __init__ (self, search, message="Improper Search. Must use instance of Search class"):
  72. self.search = search
  73. self.message = message
  74. super().__init__(self.message)
  75. class MLSDATA:
  76. """Fetches and stores MLS Data
  77. Currently only supports GeorgiaMLS.com (GMLS)"""
  78. counties=['Gwinnett', 'Barrow', 'Hall', 'Jackson', 'Walton']
  79. GoogleAPIKey = 'AIzaSyAXAnpBtjv760W8YIPqKZ0dFXpwAaZN7Es'
  80. live_google = True
  81. def __init__ (self, mlstype):
  82. self.help = "This is a class that will retrieve MLS data from various sources, store the info in a database, and run queries on the data."
  83. self.mlstype = mlstype.lower() ## Determines what kind of data is to be retreieve (gmls, Zillow, etc)
  84. self.cursor = ''
  85. self.cnx = ''
  86. self.new_listings = []
  87. def stringbuilder(self, search: Search):
  88. """ Takes Search class and build appropriate URL query based on mlstype. Currently only supports gmls."""
  89. if self.mlstype == 'gmls':
  90. base_addr = 'https://www.georgiamls.com/real-estate/search-action.cfm?'
  91. params = [('cnty', search.county), \
  92. ('lpl', search.lower_price), ('lph', search.upper_price), \
  93. ('acresL', search.lower_acres), ('acresH', search.upper_acres), \
  94. ('orderBy', 'b'), \
  95. ('scat', '1'), \
  96. ('sdsp', 'g')]
  97. for type in search.type:
  98. if 'land' in type.lower():
  99. params.append(('typ', 'll'))
  100. if 'farm' in type.lower():
  101. params.append(('typ', 'af'))
  102. if 'home' in type.lower():
  103. params.append(('typ', 'sd'))
  104. if 'house' in type.lower():
  105. params.append(('typ', 'sd'))
  106. search_string = base_addr + urllib.parse.urlencode(params)
  107. print(search_string)
  108. return search_string
  109. def break_address(self, address):
  110. """Takes an address string in the form 'street address|city, state zip' and returns a list"""
  111. street = address[:address.find('|')]
  112. csz = address[address.find('|')+1:]
  113. city = csz[:csz.find(',')]
  114. st = csz[csz.find(',')+1:].split(' ')[1]
  115. zip = csz[csz.find(',')+1:].split(' ')[2]
  116. split_address = [street, city, st, zip]
  117. return split_address
  118. def gmlsparser(self, URL, county, pages=''):
  119. """ Retrieve the website for georgiamls.com and returns a list of Property objects.
  120. UNIQUE TO GEORGIAMLS.COM ONLY!!"""
  121. properties_list = []
  122. r = requests.get(URL)
  123. soup = BeautifulSoup(r.content, 'html5lib')
  124. if pages == '':
  125. try:
  126. pages = soup.find("div", {'class':'small listing-pagination-count'}).getText().strip().split(" ")[-1]
  127. current_page = soup.find("div", {'class':'small listing-pagination-count'}).getText().strip().split(" ")[-3]
  128. except AttributeError as err:
  129. print("No Results Found.")
  130. return
  131. else:
  132. print('pages already set to: ' + str(pages))
  133. for page in range(0, int(pages)):
  134. print('Processing Page: ' + str(page + 1) + ' of ' + str(pages))
  135. if not page == 0:
  136. next_URL = URL + '&start=' + str(((12*page)+1))
  137. soup = BeautifulSoup(requests.get(next_URL).content, 'html5lib')
  138. raw_listings = soup.findAll("div", {'class':'col-xs-12 col-sm-6 col-lg-4 text-center listing-gallery'})
  139. for listing in raw_listings:
  140. items = listing.findAll("p") ##
  141. site_name = self.mlstype
  142. MLS = " ".join(items[3].getText().strip()[6:15].split()) ## MLS NUMBER
  143. title = '' ## Listing Title (address if no title)
  144. price = items[0].string.strip() ## Price
  145. if self.mlstype == 'gmls':
  146. link = 'https://www.georgiamls.com' + listing.a['href']
  147. detail_request = requests.get(link)
  148. detail_soup = BeautifulSoup(detail_request.content, 'html5lib')
  149. details = detail_soup.findAll('tr')
  150. bedbath = details[1].findAll('td')[1].getText().strip().split('/')
  151. br = bedbath[0][:-3]
  152. ba = bedbath[1][:-3]
  153. baths = ba ## IF House is present
  154. bedrooms = br ## IF House is present
  155. address = ''
  156. for element in details:
  157. if 'sqft' in element.getText():
  158. sqft = element.findAll('td')[1].getText().strip()[:-5].replace(',','')
  159. if 'lot size' in element.getText().lower():
  160. acres = element.findAll('td')[1].getText().strip()[:-6]
  161. if 'Property Type' in element.getText():
  162. ptype = element.findAll('td')[1].getText().strip()
  163. if 'acreage' in ptype.lower():
  164. type = 'af'
  165. elif 'land lot' in ptype.lower():
  166. type = 'll'
  167. elif 'single family home' in ptype.lower():
  168. type = 'sf'
  169. else:
  170. type = 'unknown'
  171. if 'Address' in element.getText():
  172. if not address: #Prevents finding the word 'address' elsewhere in the listings
  173. address = element.findAll('td')[1]
  174. #7 print("TEST ADDRESS: ", element)
  175. street_address = list(address)[0].strip()
  176. csz = list(address)[2].strip()
  177. split_address = self.break_address(street_address + '|' + csz)
  178. description = detail_soup.find('div', {'id':'listing-remarks'}).getText().strip().replace('\t','')
  179. data = Property(site_name = self.mlstype, \
  180. type = type, \
  181. MLS = MLS, \
  182. bedrooms = bedrooms, \
  183. baths = baths, \
  184. sqft = sqft, \
  185. address = split_address[0], \
  186. city = split_address[1].title(), \
  187. st = split_address[2].upper(), \
  188. zip = split_address[3], \
  189. county = county.title(), \
  190. price = price.replace('$','').replace(',',''), \
  191. acres = acres, \
  192. description = description, \
  193. link = link)
  194. properties_list.append(data)
  195. print('Scanned: ' + data.address)
  196. return properties_list
  197. def getmlsdata(self, search: Search):
  198. """This is the main entrypoint. Takes arguments to pass to stringbuilder to create the URL.
  199. Selects appropriate parser based on self.mlstype from class intance.
  200. Needs any modifications from the standard search ($0 to $500,000, 5 to 15 acres, etc)
  201. See class search for more information.
  202. --> 9/1/20 - takes Search class as argument. All properties are handled by the class <--"""
  203. if isinstance(search, Search):
  204. if not search.county.capitalize() in self.counties: ### FIX for lower()
  205. print("County " + search.county + " not regognized. Exiting")
  206. else:
  207. print("Scanning for results in " + search.county + " using the " + self.mlstype.upper() + " database on: " + str(datetime.datetime.now()))
  208. if self.mlstype == 'gmls':
  209. list = self.gmlsparser(self.stringbuilder(search), search.county)
  210. return list
  211. else:
  212. raise ImproperSearchError(search)
  213. def checkdb(self, criteria_dict):
  214. """Check dictionary of critera against database.
  215. Currently accepts keys: MLS, title, address (street number/name, not city/state/zip).
  216. Returns True if records exists."""
  217. if self.cursor: ## Check if DB is connected
  218. for criteria in criteria_dict:
  219. ## Determine criteria passed, and execute queries for each
  220. if criteria == 'MLS':
  221. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE MLS = %(MLS)s GROUP BY id", {criteria:criteria_dict[criteria]})
  222. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  223. elif criteria == 'title':
  224. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE title = %(title)s GROUP BY id", {criteria:criteria_dict[criteria]})
  225. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  226. elif criteria == 'address':
  227. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE address = %(address)s GROUP BY id", {criteria:criteria_dict[criteria]})
  228. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  229. else:
  230. print("Cannot search on parameter: " + criteria)
  231. return self.cursor.rowcount
  232. else:
  233. print("Database is not connected or cursor not filled. Use function 'connectdb()' to establish")
  234. def getGoogle(self, property):
  235. """Supplies date from Google Distance Matrix API to populate
  236. distance_to_work
  237. time_to_work
  238. distance_to_school
  239. time_to_school
  240. Costs money, so it should only be called when inserting a new db record.
  241. Returns distance in METERS (1m = 0.000621371 mi) and time in SECONDS
  242. returns fully populated Propery object."""
  243. print("Fetching live Google Data. $$")
  244. # Build Request
  245. destination1 = 'Hebron Christian Acadamy' ## Working query for Hebron Christian Acadamy
  246. destination2 = 'JHRJ+FJ Atlanta, Georgia' ## Plus code for Hourly parking at Int'l Terminal, KATL
  247. params = {}
  248. params['units'] = 'imperial'
  249. params['origins'] = property.address + ', ' + property.city + ' ' + property.st
  250. params['destinations'] = 'Hebron Christian Acadamy|JHRJ+FJ Atlanta, Georgia'
  251. params['key'] = self.GoogleAPIKey
  252. baseURL = 'https://maps.googleapis.com/maps/api/distancematrix/json?'
  253. API_URL = baseURL + urllib.parse.urlencode(params)
  254. # print(API_URL)
  255. # Send Request and capture result as json
  256. try:
  257. google_result = requests.get(API_URL).json()
  258. if google_result['status'] == 'OK':
  259. property.distance_to_school = google_result['rows'][0]['elements'][0]['distance']['value']
  260. property.time_to_school = google_result['rows'][0]['elements'][0]['duration']['value']
  261. property.distance_to_work = google_result['rows'][0]['elements'][1]['distance']['value']
  262. property.time_to_work = google_result['rows'][0]['elements'][1]['duration']['value']
  263. except:
  264. print("ERROR: Failed to obtain Google API data")
  265. #Load sample data for testing:
  266. # with open('complex.json') as f:
  267. # data = json.load(f)
  268. # google_result = data
  269. ### end testing json ###
  270. def insertrecord(self, property, work_address=None, school_address=None):
  271. """Inserts record into database. Takes argument Property class object.
  272. FUTURE - add date_added field to insert operation."""
  273. if self.cursor:
  274. criteria_dict = property.__dict__
  275. criteria_dict['Date_Added'] = str(datetime.date.today())
  276. placeholder_columns = ", ".join(criteria_dict.keys())
  277. placeholder_values = ", ".join([":{0}".format(col) for col in criteria_dict.keys()])
  278. qry = "INSERT INTO properties ({placeholder_columns}) VALUES {placeholder_values}".format(placeholder_columns=placeholder_columns, placeholder_values=tuple(criteria_dict.values()))
  279. self.cursor.execute(qry)
  280. self.cnx.commit()
  281. print("Inserted " + criteria_dict['MLS'] + " | " + criteria_dict['address'] + " into database.")
  282. else:
  283. print("Database is not connected or cursor not filled. Use function 'connectdb()' to establish")
  284. def connectdb(self, host='192.168.100.26', user='landsearchuser', password='1234', database='landsearch'):
  285. """Connects to database and returns a cursor object"""
  286. self.cnx = mysql.connector.connect(host=host, user=user, password=password, database=database, buffered=True)
  287. self.cursor = self.cnx.cursor()
  288. return self.cursor
  289. def closedb(self):
  290. """Cleanly close the db."""
  291. self.cursor.close()
  292. self.cnx.close()
  293. def dbinsert(self, properties: list):
  294. """Inserts records into database. Takes list of Property class objects"""
  295. if not properties == None:
  296. if not isinstance(properties, list):
  297. raise TypeError('type list required')
  298. for property in properties:
  299. if not self.checkdb({'MLS': property.MLS, 'address': property.address}):
  300. if self.live_google: self.getGoogle(property) ## <- This will populate distance and time fields if set TRUE
  301. self.insertrecord(property)
  302. self.new_listings.append(property)
  303. else:
  304. print(property.MLS + ' | ' + property.address + ' is already in db. Not inserted.')
  305. ##REMOVE FOR TESTING###
  306. # self.new_listings.append(property)
  307. #######################
  308. else:
  309. print("Empty dataset. No records to insert.")
  310. def alerts(self):
  311. pass
  312. def email(self):
  313. body = ''
  314. data = []
  315. subj = str(len(self.new_listings)) + " New Real Estate Listings for " + str(datetime.date.today())
  316. for listing in self.new_listings:
  317. row = []
  318. body += listing.MLS + " | " + listing.address + " | " + listing.acres + " | " + listing.price + " | " + listing.link + "\n"
  319. row.append(listing.MLS)
  320. row.append(listing.address)
  321. row.append('{:0,.2f}'.format(float(listing.acres)))
  322. row.append(listing.sqft)
  323. row.append('${:0,.0f}'.format(int(listing.price)))
  324. row.append(listing.time_to_school/60 if hasattr(listing, 'time_to_school') else 'NA')
  325. row.append(listing.link)
  326. data.append(row)
  327. body = """\
  328. Daily Real Estate Search Report\n
  329. The following properties have been found which may be of interest.\n
  330. """
  331. results = tabulate(data, headers=['MLS', 'Address', 'Acres', 'sqft', 'Price', 'Time to School', 'link'])
  332. body += results
  333. sendto = ['stagl.mike@gmail.com', 'M_Stagl@hotmail.com']
  334. mymail = custom_email.simplemail(subj, body, sendto)
  335. if len(self.new_listings) > 0:
  336. try:
  337. mymail.sendmail()
  338. except Exception as e:
  339. print("Error sending email. " + e)
  340. else:
  341. print("No new listings. Email not sent")
  342. # REMOVE AFTER TESTING #
  343. mymail.sendmail()
  344. ########################
  345. ########### BEGIN CODE ###############33
  346. if __name__ == '__main__':
  347. gmls = MLSDATA('GMLS')
  348. #new_properties = []
  349. # for county in ['Walton']: ### FIX
  350. for county in gmls.counties: ### FIX
  351. # mysearch = Search(county, type=['farm', 'house', 'land'], upper_price=100000) ### FIX
  352. mysearch = Search(county, type=['farm', 'house', 'land'], upper_price=525000) ### FIX
  353. mydata = gmls.getmlsdata(mysearch)
  354. gmls.connectdb()
  355. gmls.dbinsert(mydata)
  356. gmls.closedb()
  357. gmls.email()
  358. print()
  359. print(str(len(gmls.new_listings)) + " new properties found! " + str(datetime.datetime.now()))
  360. print()
  361. for listing in gmls.new_listings:
  362. print(listing.MLS, listing.address)