Package pygeoip
[hide private]
[frames] | no frames]

Source Code for Package pygeoip

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Pure Python GeoIP API 
  4   
  5  The API is based on MaxMind's C-based Python API, but the code itself is 
  6  ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid. 
  7   
  8  @author: Jennifer Ennis <zaylea@gmail.com> 
  9   
 10  @license: Copyright(C) 2004 MaxMind LLC 
 11   
 12  This program is free software: you can redistribute it and/or modify 
 13  it under the terms of the GNU Lesser General Public License as published by 
 14  the Free Software Foundation, either version 3 of the License, or 
 15  (at your option) any later version. 
 16   
 17  This program is distributed in the hope that it will be useful, 
 18  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 20  GNU General Public License for more details. 
 21   
 22  You should have received a copy of the GNU Lesser General Public License 
 23  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>. 
 24  """ 
 25   
 26  import os 
 27  import math 
 28  import socket 
 29  import mmap 
 30  import codecs 
 31  from threading import Lock 
 32   
 33  try: 
 34      from StringIO import StringIO 
 35  except ImportError: 
 36      from io import StringIO, BytesIO 
 37   
 38  import pygeoip.const 
 39  from pygeoip import util 
 40  from pygeoip.const import PY2, PY3 
 41  from pygeoip.timezone import time_zone_by_country_and_region 
 42   
 43   
 44  STANDARD = const.STANDARD 
 45  MMAP_CACHE = const.MMAP_CACHE 
 46  MEMORY_CACHE = const.MEMORY_CACHE 
 47   
 48  ENCODING = const.ENCODING 
 49   
 50   
51 -class GeoIPError(Exception):
52 pass
53 54
55 -class GeoIPMetaclass(type):
56 - def __new__(cls, *args, **kwargs):
57 """ 58 Singleton method to gets an instance without reparsing the db. Unique 59 instances are instantiated based on the filename of the db. Flags are 60 ignored for this, i.e. if you initialize one with STANDARD 61 flag (default) and then try later to initialize with MEMORY_CACHE, it 62 will still return the STANDARD one. 63 """ 64 if not hasattr(cls, '_instances'): 65 cls._instances = {} 66 67 if len(args) > 0: 68 filename = args[0] 69 elif 'filename' in kwargs: 70 filename = kwargs['filename'] 71 72 if filename not in cls._instances: 73 cls._instances[filename] = type.__new__(cls, *args, **kwargs) 74 75 return cls._instances[filename]
76 77 78 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {}) 79 80
81 -class GeoIP(GeoIPBase):
82 - def __init__(self, filename, flags=0):
83 """ 84 Initialize the class. 85 86 @param filename: Path to a geoip database. 87 @type filename: str 88 @param flags: Flags that affect how the database is processed. 89 Currently supported flags are STANDARD (the default), 90 MEMORY_CACHE (preload the whole file into memory) and 91 MMAP_CACHE (access the file via mmap). 92 @type flags: int 93 """ 94 self._filename = filename 95 self._flags = flags 96 97 if self._flags & const.MMAP_CACHE: 98 f = open(filename, 'rb') 99 access = mmap.ACCESS_READ 100 self._filehandle = mmap.mmap(f.fileno(), 0, access=access) 101 f.close() 102 103 elif self._flags & const.MEMORY_CACHE: 104 f = open(filename, 'rb') 105 self._memoryBuffer = f.read() 106 iohandle = BytesIO if PY3 else StringIO 107 self._filehandle = iohandle(self._memoryBuffer) 108 f.close() 109 110 else: 111 self._filehandle = codecs.open(filename, 'rb', ENCODING) 112 113 self._lock = Lock() 114 self._setup_segments()
115
116 - def _setup_segments(self):
117 """ 118 Parses the database file to determine what kind of database is 119 being used and setup segment sizes and start points that will 120 be used by the seek*() methods later. 121 122 Supported databases: 123 124 * COUNTRY_EDITION 125 * COUNTRY_EDITION_V6 126 * REGION_EDITION_REV0 127 * REGION_EDITION_REV1 128 * CITY_EDITION_REV0 129 * CITY_EDITION_REV1 130 * ORG_EDITION 131 * ISP_EDITION 132 * ASNUM_EDITION 133 134 """ 135 self._databaseType = const.COUNTRY_EDITION 136 self._recordLength = const.STANDARD_RECORD_LENGTH 137 self._databaseSegments = const.COUNTRY_BEGIN 138 139 self._lock.acquire() 140 filepos = self._filehandle.tell() 141 self._filehandle.seek(-3, os.SEEK_END) 142 143 for i in range(const.STRUCTURE_INFO_MAX_SIZE): 144 chars = chr(255) * 3 145 delim = self._filehandle.read(3) 146 147 if PY3 and type(delim) is bytes: 148 delim = delim.decode(ENCODING) 149 150 if PY2: 151 chars = chars.decode(ENCODING) 152 if type(delim) is str: 153 delim = delim.decode(ENCODING) 154 155 if delim == chars: 156 byte = self._filehandle.read(1) 157 self._databaseType = ord(byte) 158 159 # Compatibility with databases from April 2003 and earlier 160 if (self._databaseType >= 106): 161 self._databaseType -= 105 162 163 if self._databaseType == const.REGION_EDITION_REV0: 164 self._databaseSegments = const.STATE_BEGIN_REV0 165 166 elif self._databaseType == const.REGION_EDITION_REV1: 167 self._databaseSegments = const.STATE_BEGIN_REV1 168 169 elif self._databaseType in (const.CITY_EDITION_REV0, 170 const.CITY_EDITION_REV1, 171 const.ORG_EDITION, 172 const.ISP_EDITION, 173 const.ASNUM_EDITION): 174 self._databaseSegments = 0 175 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH) 176 177 if PY3 and type(buf) is bytes: 178 buf = buf.decode(ENCODING) 179 180 for j in range(const.SEGMENT_RECORD_LENGTH): 181 self._databaseSegments += (ord(buf[j]) << (j * 8)) 182 183 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION) 184 if self._databaseType in LONG_RECORDS: 185 self._recordLength = const.ORG_RECORD_LENGTH 186 break 187 else: 188 self._filehandle.seek(-4, os.SEEK_CUR) 189 190 self._filehandle.seek(filepos, os.SEEK_SET) 191 self._lock.release()
192
193 - def _seek_country(self, ipnum):
194 """ 195 Using the record length and appropriate start points, seek to the 196 country that corresponds to the converted IP address integer. 197 198 @param ipnum: result of ip2long conversion 199 @type ipnum: int 200 @return: offset of start of record 201 @rtype: int 202 """ 203 offset = 0 204 seek_depth = 127 if len(str(ipnum)) > 10 else 31 205 206 for depth in range(seek_depth, -1, -1): 207 if self._flags & const.MEMORY_CACHE: 208 startIndex = 2 * self._recordLength * offset 209 endIndex = startIndex + (2 * self._recordLength) 210 buf = self._memoryBuffer[startIndex:endIndex] 211 else: 212 startIndex = 2 * self._recordLength * offset 213 readLength = 2 * self._recordLength 214 self._lock.acquire() 215 self._filehandle.seek(startIndex, os.SEEK_SET) 216 buf = self._filehandle.read(readLength) 217 self._lock.release() 218 219 if PY3 and type(buf) is bytes: 220 buf = buf.decode(ENCODING) 221 222 x = [0, 0] 223 for i in range(2): 224 for j in range(self._recordLength): 225 byte = buf[self._recordLength * i + j] 226 x[i] += ord(byte) << (j * 8) 227 if ipnum & (1 << depth): 228 if x[1] >= self._databaseSegments: 229 return x[1] 230 offset = x[1] 231 else: 232 if x[0] >= self._databaseSegments: 233 return x[0] 234 offset = x[0] 235 236 raise GeoIPError('Corrupt database')
237
238 - def _get_org(self, ipnum):
239 """ 240 Seek and return organization or ISP name for ipnum. 241 @param ipnum: Converted IP address 242 @type ipnum: int 243 @return: org/isp name 244 @rtype: str 245 """ 246 seek_org = self._seek_country(ipnum) 247 if seek_org == self._databaseSegments: 248 return None 249 250 read_length = (2 * self._recordLength - 1) * self._databaseSegments 251 self._lock.acquire() 252 self._filehandle.seek(seek_org + read_length, os.SEEK_SET) 253 buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH) 254 self._lock.release() 255 256 if PY3 and type(buf) is bytes: 257 buf = buf.decode(ENCODING) 258 259 return buf[:buf.index(chr(0))]
260
261 - def _get_region(self, ipnum):
262 """ 263 Seek and return the region info (dict containing country_code 264 and region_name). 265 266 @param ipnum: Converted IP address 267 @type ipnum: int 268 @return: dict containing country_code and region_name 269 @rtype: dict 270 """ 271 region = '' 272 country_code = '' 273 seek_country = self._seek_country(ipnum) 274 275 def get_region_name(offset): 276 region1 = chr(offset // 26 + 65) 277 region2 = chr(offset % 26 + 65) 278 return ''.join([region1, region2])
279 280 if self._databaseType == const.REGION_EDITION_REV0: 281 seek_region = seek_country - const.STATE_BEGIN_REV0 282 if seek_region >= 1000: 283 country_code = 'US' 284 region = get_region_name(seek_region - 1000) 285 else: 286 country_code = const.COUNTRY_CODES[seek_region] 287 elif self._databaseType == const.REGION_EDITION_REV1: 288 seek_region = seek_country - const.STATE_BEGIN_REV1 289 if seek_region < const.US_OFFSET: 290 pass 291 elif seek_region < const.CANADA_OFFSET: 292 country_code = 'US' 293 region = get_region_name(seek_region - const.US_OFFSET) 294 elif seek_region < const.WORLD_OFFSET: 295 country_code = 'CA' 296 region = get_region_name(seek_region - const.CANADA_OFFSET) 297 else: 298 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE 299 if index in const.COUNTRY_CODES: 300 country_code = const.COUNTRY_CODES[index] 301 elif self._databaseType in const.CITY_EDITIONS: 302 rec = self._get_record(ipnum) 303 country_code = rec['country_code'] if 'country_code' in rec else '' 304 region = rec['region_name'] if 'region_name' in rec else '' 305 306 return {'country_code': country_code, 'region_name': region}
307
308 - def _get_record(self, ipnum):
309 """ 310 Populate location dict for converted IP. 311 312 @param ipnum: Converted IP address 313 @type ipnum: int 314 @return: dict with country_code, country_code3, country_name, 315 region, city, postal_code, latitude, longitude, 316 dma_code, metro_code, area_code, region_name, time_zone 317 @rtype: dict 318 """ 319 seek_country = self._seek_country(ipnum) 320 if seek_country == self._databaseSegments: 321 return None 322 323 read_length = (2 * self._recordLength - 1) * self._databaseSegments 324 self._lock.acquire() 325 self._filehandle.seek(seek_country + read_length, os.SEEK_SET) 326 buf = self._filehandle.read(const.FULL_RECORD_LENGTH) 327 self._lock.release() 328 329 if PY3 and type(buf) is bytes: 330 buf = buf.decode(ENCODING) 331 332 record = { 333 'dma_code': 0, 334 'area_code': 0, 335 'metro_code': '', 336 'postal_code': '' 337 } 338 339 latitude = 0 340 longitude = 0 341 buf_pos = 0 342 343 # Get country 344 char = ord(buf[buf_pos]) 345 record['country_code'] = const.COUNTRY_CODES[char] 346 record['country_code3'] = const.COUNTRY_CODES3[char] 347 record['country_name'] = const.COUNTRY_NAMES[char] 348 buf_pos += 1 349 350 def get_data(buf, buf_pos): 351 offset = buf_pos 352 char = ord(buf[offset]) 353 while (char != 0): 354 offset += 1 355 char = ord(buf[offset]) 356 if offset > buf_pos: 357 return (offset, buf[buf_pos:offset]) 358 return (offset, '')
359 360 offset, record['region_name'] = get_data(buf, buf_pos) 361 offset, record['city'] = get_data(buf, offset + 1) 362 offset, record['postal_code'] = get_data(buf, offset + 1) 363 buf_pos = offset + 1 364 365 for j in range(3): 366 char = ord(buf[buf_pos]) 367 buf_pos += 1 368 latitude += (char << (j * 8)) 369 370 for j in range(3): 371 char = ord(buf[buf_pos]) 372 buf_pos += 1 373 longitude += (char << (j * 8)) 374 375 record['latitude'] = (latitude / 10000.0) - 180.0 376 record['longitude'] = (longitude / 10000.0) - 180.0 377 378 if self._databaseType == const.CITY_EDITION_REV1: 379 dmaarea_combo = 0 380 if record['country_code'] == 'US': 381 for j in range(3): 382 char = ord(buf[buf_pos]) 383 dmaarea_combo += (char << (j * 8)) 384 buf_pos += 1 385 386 record['dma_code'] = int(math.floor(dmaarea_combo / 1000)) 387 record['area_code'] = dmaarea_combo % 1000 388 389 if record['dma_code'] in const.DMA_MAP: 390 record['metro_code'] = const.DMA_MAP[record['dma_code']] 391 392 params = (record['country_code'], record['region_name']) 393 record['time_zone'] = time_zone_by_country_and_region(*params) 394 395 return record 396
397 - def _gethostbyname(self, hostname):
398 if self._databaseType in const.IPV6_EDITIONS: 399 try: 400 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6) 401 family, socktype, proto, canonname, sockaddr = response[0] 402 address, port, flow, scope = sockaddr 403 return address 404 except socket.gaierror: 405 return '' 406 else: 407 return socket.gethostbyname(hostname)
408
409 - def id_by_addr(self, addr):
410 """ 411 Get the country index. 412 Looks up the index for the country which is the key for 413 the code and name. 414 415 @param addr: The IP address 416 @type addr: str 417 @return: network byte order 32-bit integer 418 @rtype: int 419 """ 420 ipnum = util.ip2long(addr) 421 if not ipnum: 422 raise ValueError("Invalid IP address: %s" % addr) 423 424 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 425 if self._databaseType not in COUNTY_EDITIONS: 426 message = 'Invalid database type, expected Country' 427 raise GeoIPError(message) 428 429 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
430
431 - def country_code_by_addr(self, addr):
432 """ 433 Returns 2-letter country code (e.g. 'US') for specified IP address. 434 Use this method if you have a Country, Region, or City database. 435 436 @param addr: IP address 437 @type addr: str 438 @return: 2-letter country code 439 @rtype: str 440 """ 441 try: 442 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 443 if self._databaseType in VALID_EDITIONS: 444 ipv = 6 if addr.find(':') >= 0 else 4 445 446 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION: 447 message = 'Invalid database type; expected IPv6 address' 448 raise ValueError(message) 449 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6: 450 message = 'Invalid database type; expected IPv4 address' 451 raise ValueError(message) 452 453 country_id = self.id_by_addr(addr) 454 455 return const.COUNTRY_CODES[country_id] 456 elif self._databaseType in const.REGION_CITY_EDITIONS: 457 return self.region_by_addr(addr)['country_code'] 458 459 message = 'Invalid database type, expected Country, City or Region' 460 raise GeoIPError(message) 461 except ValueError: 462 raise GeoIPError('Failed to lookup address %s' % addr)
463
464 - def country_code_by_name(self, hostname):
465 """ 466 Returns 2-letter country code (e.g. 'US') for specified hostname. 467 Use this method if you have a Country, Region, or City database. 468 469 @param hostname: Hostname 470 @type hostname: str 471 @return: 2-letter country code 472 @rtype: str 473 """ 474 addr = self._gethostbyname(hostname) 475 return self.country_code_by_addr(addr)
476
477 - def country_name_by_addr(self, addr):
478 """ 479 Returns full country name for specified IP address. 480 Use this method if you have a Country or City database. 481 482 @param addr: IP address 483 @type addr: str 484 @return: country name 485 @rtype: str 486 """ 487 try: 488 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) 489 if self._databaseType in VALID_EDITIONS: 490 return const.COUNTRY_NAMES[self.id_by_addr(addr)] 491 elif self._databaseType in const.CITY_EDITIONS: 492 return self.record_by_addr(addr)['country_name'] 493 else: 494 message = 'Invalid database type, expected Country or City' 495 raise GeoIPError(message) 496 except ValueError: 497 raise GeoIPError('Failed to lookup address %s' % addr)
498
499 - def country_name_by_name(self, hostname):
500 """ 501 Returns full country name for specified hostname. 502 Use this method if you have a Country database. 503 504 @param hostname: Hostname 505 @type hostname: str 506 @return: country name 507 @rtype: str 508 """ 509 addr = self._gethostbyname(hostname) 510 return self.country_name_by_addr(addr)
511
512 - def org_by_addr(self, addr):
513 """ 514 Lookup Organization, ISP or ASNum for given IP address. 515 Use this method if you have an Organization, ISP or ASNum database. 516 517 @param addr: IP address 518 @type addr: str 519 @return: organization or ISP name 520 @rtype: str 521 """ 522 try: 523 ipnum = util.ip2long(addr) 524 if not ipnum: 525 raise ValueError('Invalid IP address') 526 527 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION) 528 if self._databaseType not in valid: 529 message = 'Invalid database type, expected Org, ISP or ASNum' 530 raise GeoIPError(message) 531 532 return self._get_org(ipnum) 533 except ValueError: 534 raise GeoIPError('Failed to lookup address %s' % addr)
535
536 - def org_by_name(self, hostname):
537 """ 538 Lookup the organization (or ISP) for hostname. 539 Use this method if you have an Organization/ISP database. 540 541 @param hostname: Hostname 542 @type hostname: str 543 @return: Organization or ISP name 544 @rtype: str 545 """ 546 addr = self._gethostbyname(hostname) 547 return self.org_by_addr(addr)
548
549 - def record_by_addr(self, addr):
550 """ 551 Look up the record for a given IP address. 552 Use this method if you have a City database. 553 554 @param addr: IP address 555 @type addr: str 556 @return: Dictionary with country_code, country_code3, country_name, 557 region, city, postal_code, latitude, longitude, dma_code, 558 metro_code, area_code, region_name, time_zone 559 @rtype: dict 560 """ 561 try: 562 ipnum = util.ip2long(addr) 563 if not ipnum: 564 raise ValueError('Invalid IP address') 565 566 if self._databaseType not in const.CITY_EDITIONS: 567 message = 'Invalid database type, expected City' 568 raise GeoIPError(message) 569 570 return self._get_record(ipnum) 571 except ValueError: 572 raise GeoIPError('Failed to lookup address %s' % addr)
573
574 - def record_by_name(self, hostname):
575 """ 576 Look up the record for a given hostname. 577 Use this method if you have a City database. 578 579 @param hostname: Hostname 580 @type hostname: str 581 @return: Dictionary with country_code, country_code3, country_name, 582 region, city, postal_code, latitude, longitude, dma_code, 583 metro_code, area_code, region_name, time_zone 584 @rtype: dict 585 """ 586 addr = self._gethostbyname(hostname) 587 return self.record_by_addr(addr)
588
589 - def region_by_addr(self, addr):
590 """ 591 Lookup the region for given IP address. 592 Use this method if you have a Region database. 593 594 @param addr: IP address 595 @type addr: str 596 @return: Dictionary containing country_code, region and region_name 597 @rtype: dict 598 """ 599 try: 600 ipnum = util.ip2long(addr) 601 if not ipnum: 602 raise ValueError('Invalid IP address') 603 604 if self._databaseType not in const.REGION_CITY_EDITIONS: 605 message = 'Invalid database type, expected Region or City' 606 raise GeoIPError(message) 607 608 return self._get_region(ipnum) 609 except ValueError: 610 raise GeoIPError('Failed to lookup address %s' % addr)
611
612 - def region_by_name(self, hostname):
613 """ 614 Lookup the region for given hostname. 615 Use this method if you have a Region database. 616 617 @param hostname: Hostname 618 @type hostname: str 619 @return: Dictionary containing country_code, region, and region_name 620 @rtype: dict 621 """ 622 addr = self._gethostbyname(hostname) 623 return self.region_by_addr(addr)
624
625 - def time_zone_by_addr(self, addr):
626 """ 627 Look up the time zone for a given IP address. 628 Use this method if you have a Region or City database. 629 630 @param addr: IP address 631 @type addr: str 632 @return: Time zone 633 @rtype: str 634 """ 635 try: 636 ipnum = util.ip2long(addr) 637 if not ipnum: 638 raise ValueError('Invalid IP address') 639 640 if self._databaseType not in const.CITY_EDITIONS: 641 message = 'Invalid database type, expected City' 642 raise GeoIPError(message) 643 644 return self._get_record(ipnum)['time_zone'] 645 except ValueError: 646 raise GeoIPError('Failed to lookup address %s' % addr)
647
648 - def time_zone_by_name(self, hostname):
649 """ 650 Look up the time zone for a given hostname. 651 Use this method if you have a Region or City database. 652 653 @param hostname: Hostname 654 @type hostname: str 655 @return: Time zone 656 @rtype: str 657 """ 658 addr = self._gethostbyname(hostname) 659 return self.time_zone_by_addr(addr)
660